Bug Summary

File:build/source/llvm/lib/Target/ARM/ARMISelLowering.cpp
Warning:line 7249, column 18
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ARMISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/ARM -I /build/source/llvm/lib/Target/ARM -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Target/ARM/ARMISelLowering.cpp

/build/source/llvm/lib/Target/ARM/ARMISelLowering.cpp

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetTransformInfo.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Twine.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/CodeGen/CallingConvLower.h"
43#include "llvm/CodeGen/ISDOpcodes.h"
44#include "llvm/CodeGen/IntrinsicLowering.h"
45#include "llvm/CodeGen/MachineBasicBlock.h"
46#include "llvm/CodeGen/MachineConstantPool.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineFunction.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineJumpTableInfo.h"
52#include "llvm/CodeGen/MachineMemOperand.h"
53#include "llvm/CodeGen/MachineOperand.h"
54#include "llvm/CodeGen/MachineRegisterInfo.h"
55#include "llvm/CodeGen/MachineValueType.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
59#include "llvm/CodeGen/SelectionDAGNodes.h"
60#include "llvm/CodeGen/TargetInstrInfo.h"
61#include "llvm/CodeGen/TargetLowering.h"
62#include "llvm/CodeGen/TargetOpcodes.h"
63#include "llvm/CodeGen/TargetRegisterInfo.h"
64#include "llvm/CodeGen/TargetSubtargetInfo.h"
65#include "llvm/CodeGen/ValueTypes.h"
66#include "llvm/IR/Attributes.h"
67#include "llvm/IR/CallingConv.h"
68#include "llvm/IR/Constant.h"
69#include "llvm/IR/Constants.h"
70#include "llvm/IR/DataLayout.h"
71#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/DerivedTypes.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/GlobalValue.h"
76#include "llvm/IR/GlobalVariable.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InlineAsm.h"
79#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Instructions.h"
81#include "llvm/IR/IntrinsicInst.h"
82#include "llvm/IR/Intrinsics.h"
83#include "llvm/IR/IntrinsicsARM.h"
84#include "llvm/IR/Module.h"
85#include "llvm/IR/PatternMatch.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
90#include "llvm/MC/MCInstrItineraries.h"
91#include "llvm/MC/MCRegisterInfo.h"
92#include "llvm/MC/MCSchedule.h"
93#include "llvm/Support/AtomicOrdering.h"
94#include "llvm/Support/BranchProbability.h"
95#include "llvm/Support/Casting.h"
96#include "llvm/Support/CodeGen.h"
97#include "llvm/Support/CommandLine.h"
98#include "llvm/Support/Compiler.h"
99#include "llvm/Support/Debug.h"
100#include "llvm/Support/ErrorHandling.h"
101#include "llvm/Support/KnownBits.h"
102#include "llvm/Support/MathExtras.h"
103#include "llvm/Support/raw_ostream.h"
104#include "llvm/Target/TargetMachine.h"
105#include "llvm/Target/TargetOptions.h"
106#include "llvm/TargetParser/Triple.h"
107#include <algorithm>
108#include <cassert>
109#include <cstdint>
110#include <cstdlib>
111#include <iterator>
112#include <limits>
113#include <optional>
114#include <string>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE"arm-isel" "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls"}
;
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt"}
;
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments"}
;
127STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
128 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
;
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
135static cl::opt<bool> EnableConstpoolPromotion(
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
140static cl::opt<unsigned> ConstpoolPromotionMaxSize(
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
144static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
149cl::opt<unsigned>
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
159void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
160 if (VT != PromotedLdStVT) {
161 setOperationAction(ISD::LOAD, VT, Promote);
162 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
163
164 setOperationAction(ISD::STORE, VT, Promote);
165 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
166 }
167
168 MVT ElemTy = VT.getVectorElementType();
169 if (ElemTy != MVT::f64)
170 setOperationAction(ISD::SETCC, VT, Custom);
171 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
172 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
173 if (ElemTy == MVT::i32) {
174 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
175 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
176 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
177 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
178 } else {
179 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
180 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
181 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
182 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
183 }
184 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
185 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
186 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
187 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
188 setOperationAction(ISD::SELECT, VT, Expand);
189 setOperationAction(ISD::SELECT_CC, VT, Expand);
190 setOperationAction(ISD::VSELECT, VT, Expand);
191 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
192 if (VT.isInteger()) {
193 setOperationAction(ISD::SHL, VT, Custom);
194 setOperationAction(ISD::SRA, VT, Custom);
195 setOperationAction(ISD::SRL, VT, Custom);
196 }
197
198 // Neon does not support vector divide/remainder operations.
199 setOperationAction(ISD::SDIV, VT, Expand);
200 setOperationAction(ISD::UDIV, VT, Expand);
201 setOperationAction(ISD::FDIV, VT, Expand);
202 setOperationAction(ISD::SREM, VT, Expand);
203 setOperationAction(ISD::UREM, VT, Expand);
204 setOperationAction(ISD::FREM, VT, Expand);
205 setOperationAction(ISD::SDIVREM, VT, Expand);
206 setOperationAction(ISD::UDIVREM, VT, Expand);
207
208 if (!VT.isFloatingPoint() &&
209 VT != MVT::v2i64 && VT != MVT::v1i64)
210 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211 setOperationAction(Opcode, VT, Legal);
212 if (!VT.isFloatingPoint())
213 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
214 setOperationAction(Opcode, VT, Legal);
215}
216
217void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
218 addRegisterClass(VT, &ARM::DPRRegClass);
219 addTypeForNEON(VT, MVT::f64);
220}
221
222void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
223 addRegisterClass(VT, &ARM::DPairRegClass);
224 addTypeForNEON(VT, MVT::v2f64);
225}
226
227void ARMTargetLowering::setAllExpand(MVT VT) {
228 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
229 setOperationAction(Opc, VT, Expand);
230
231 // We support these really simple operations even on types where all
232 // the actual arithmetic has to be broken down into simpler
233 // operations or turned into library calls.
234 setOperationAction(ISD::BITCAST, VT, Legal);
235 setOperationAction(ISD::LOAD, VT, Legal);
236 setOperationAction(ISD::STORE, VT, Legal);
237 setOperationAction(ISD::UNDEF, VT, Legal);
238}
239
240void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
241 LegalizeAction Action) {
242 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
243 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
244 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
245}
246
247void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
248 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
249
250 for (auto VT : IntTypes) {
251 addRegisterClass(VT, &ARM::MQPRRegClass);
252 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
253 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
254 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
255 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
256 setOperationAction(ISD::SHL, VT, Custom);
257 setOperationAction(ISD::SRA, VT, Custom);
258 setOperationAction(ISD::SRL, VT, Custom);
259 setOperationAction(ISD::SMIN, VT, Legal);
260 setOperationAction(ISD::SMAX, VT, Legal);
261 setOperationAction(ISD::UMIN, VT, Legal);
262 setOperationAction(ISD::UMAX, VT, Legal);
263 setOperationAction(ISD::ABS, VT, Legal);
264 setOperationAction(ISD::SETCC, VT, Custom);
265 setOperationAction(ISD::MLOAD, VT, Custom);
266 setOperationAction(ISD::MSTORE, VT, Legal);
267 setOperationAction(ISD::CTLZ, VT, Legal);
268 setOperationAction(ISD::CTTZ, VT, Custom);
269 setOperationAction(ISD::BITREVERSE, VT, Legal);
270 setOperationAction(ISD::BSWAP, VT, Legal);
271 setOperationAction(ISD::SADDSAT, VT, Legal);
272 setOperationAction(ISD::UADDSAT, VT, Legal);
273 setOperationAction(ISD::SSUBSAT, VT, Legal);
274 setOperationAction(ISD::USUBSAT, VT, Legal);
275 setOperationAction(ISD::ABDS, VT, Legal);
276 setOperationAction(ISD::ABDU, VT, Legal);
277 setOperationAction(ISD::AVGFLOORS, VT, Legal);
278 setOperationAction(ISD::AVGFLOORU, VT, Legal);
279 setOperationAction(ISD::AVGCEILS, VT, Legal);
280 setOperationAction(ISD::AVGCEILU, VT, Legal);
281
282 // No native support for these.
283 setOperationAction(ISD::UDIV, VT, Expand);
284 setOperationAction(ISD::SDIV, VT, Expand);
285 setOperationAction(ISD::UREM, VT, Expand);
286 setOperationAction(ISD::SREM, VT, Expand);
287 setOperationAction(ISD::UDIVREM, VT, Expand);
288 setOperationAction(ISD::SDIVREM, VT, Expand);
289 setOperationAction(ISD::CTPOP, VT, Expand);
290 setOperationAction(ISD::SELECT, VT, Expand);
291 setOperationAction(ISD::SELECT_CC, VT, Expand);
292
293 // Vector reductions
294 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
295 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
296 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
297 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
298 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
299 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
300 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
301 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
302 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
303
304 if (!HasMVEFP) {
305 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
306 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
307 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
308 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
309 } else {
310 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
311 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
312 }
313
314 // Pre and Post inc are supported on loads and stores
315 for (unsigned im = (unsigned)ISD::PRE_INC;
316 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
317 setIndexedLoadAction(im, VT, Legal);
318 setIndexedStoreAction(im, VT, Legal);
319 setIndexedMaskedLoadAction(im, VT, Legal);
320 setIndexedMaskedStoreAction(im, VT, Legal);
321 }
322 }
323
324 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325 for (auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
327 if (!HasMVEFP)
328 setAllExpand(VT);
329
330 // These are legal or custom whether we have MVE.fp or not
331 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
332 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
333 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
334 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
335 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
336 setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
337 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
338 setOperationAction(ISD::SETCC, VT, Custom);
339 setOperationAction(ISD::MLOAD, VT, Custom);
340 setOperationAction(ISD::MSTORE, VT, Legal);
341 setOperationAction(ISD::SELECT, VT, Expand);
342 setOperationAction(ISD::SELECT_CC, VT, Expand);
343
344 // Pre and Post inc are supported on loads and stores
345 for (unsigned im = (unsigned)ISD::PRE_INC;
346 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
347 setIndexedLoadAction(im, VT, Legal);
348 setIndexedStoreAction(im, VT, Legal);
349 setIndexedMaskedLoadAction(im, VT, Legal);
350 setIndexedMaskedStoreAction(im, VT, Legal);
351 }
352
353 if (HasMVEFP) {
354 setOperationAction(ISD::FMINNUM, VT, Legal);
355 setOperationAction(ISD::FMAXNUM, VT, Legal);
356 setOperationAction(ISD::FROUND, VT, Legal);
357 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
358 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
359 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
360 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
361
362 // No native support for these.
363 setOperationAction(ISD::FDIV, VT, Expand);
364 setOperationAction(ISD::FREM, VT, Expand);
365 setOperationAction(ISD::FSQRT, VT, Expand);
366 setOperationAction(ISD::FSIN, VT, Expand);
367 setOperationAction(ISD::FCOS, VT, Expand);
368 setOperationAction(ISD::FPOW, VT, Expand);
369 setOperationAction(ISD::FLOG, VT, Expand);
370 setOperationAction(ISD::FLOG2, VT, Expand);
371 setOperationAction(ISD::FLOG10, VT, Expand);
372 setOperationAction(ISD::FEXP, VT, Expand);
373 setOperationAction(ISD::FEXP2, VT, Expand);
374 setOperationAction(ISD::FNEARBYINT, VT, Expand);
375 }
376 }
377
378 // Custom Expand smaller than legal vector reductions to prevent false zero
379 // items being added.
380 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
381 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
382 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
383 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
384 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
385 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
386 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
387 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
388
389 // We 'support' these types up to bitcast/load/store level, regardless of
390 // MVE integer-only / float support. Only doing FP data processing on the FP
391 // vector types is inhibited at integer-only level.
392 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
393 for (auto VT : LongTypes) {
394 addRegisterClass(VT, &ARM::MQPRRegClass);
395 setAllExpand(VT);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
398 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
399 setOperationAction(ISD::VSELECT, VT, Legal);
400 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
401 }
402 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
403
404 // We can do bitwise operations on v2i64 vectors
405 setOperationAction(ISD::AND, MVT::v2i64, Legal);
406 setOperationAction(ISD::OR, MVT::v2i64, Legal);
407 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
408
409 // It is legal to extload from v4i8 to v4i16 or v4i32.
410 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
411 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
412 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
413
414 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
415 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
416 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
417 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
418 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
419 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
420
421 // Some truncating stores are legal too.
422 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
423 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
424 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
425
426 // Pre and Post inc on these are legal, given the correct extends
427 for (unsigned im = (unsigned)ISD::PRE_INC;
428 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
429 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
430 setIndexedLoadAction(im, VT, Legal);
431 setIndexedStoreAction(im, VT, Legal);
432 setIndexedMaskedLoadAction(im, VT, Legal);
433 setIndexedMaskedStoreAction(im, VT, Legal);
434 }
435 }
436
437 // Predicate types
438 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
439 for (auto VT : pTypes) {
440 addRegisterClass(VT, &ARM::VCCRRegClass);
441 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
442 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
443 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
444 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
445 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
446 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
447 setOperationAction(ISD::SETCC, VT, Custom);
448 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
449 setOperationAction(ISD::LOAD, VT, Custom);
450 setOperationAction(ISD::STORE, VT, Custom);
451 setOperationAction(ISD::TRUNCATE, VT, Custom);
452 setOperationAction(ISD::VSELECT, VT, Expand);
453 setOperationAction(ISD::SELECT, VT, Expand);
454 setOperationAction(ISD::SELECT_CC, VT, Expand);
455
456 if (!HasMVEFP) {
457 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
458 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
459 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
460 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
461 }
462 }
463 setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
464 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
465 setOperationAction(ISD::AND, MVT::v2i1, Expand);
466 setOperationAction(ISD::OR, MVT::v2i1, Expand);
467 setOperationAction(ISD::XOR, MVT::v2i1, Expand);
468 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
469 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
470 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
471 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
472
473 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
474 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
475 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
476 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
477 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
478 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
479 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
480 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
481}
482
483ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
484 const ARMSubtarget &STI)
485 : TargetLowering(TM), Subtarget(&STI) {
486 RegInfo = Subtarget->getRegisterInfo();
487 Itins = Subtarget->getInstrItineraryData();
488
489 setBooleanContents(ZeroOrOneBooleanContent);
490 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
491
492 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
493 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
494 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
495 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
496 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
497 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
498 : CallingConv::ARM_AAPCS);
499 }
500
501 if (Subtarget->isTargetMachO()) {
502 // Uses VFP for Thumb libfuncs if available.
503 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
504 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
505 static const struct {
506 const RTLIB::Libcall Op;
507 const char * const Name;
508 const ISD::CondCode Cond;
509 } LibraryCalls[] = {
510 // Single-precision floating-point arithmetic.
511 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
512 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
513 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
514 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
515
516 // Double-precision floating-point arithmetic.
517 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
518 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
519 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
520 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
521
522 // Single-precision comparisons.
523 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
524 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
525 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
526 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
527 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
528 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
529 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
530
531 // Double-precision comparisons.
532 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
533 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
534 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
535 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
536 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
537 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
538 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
539
540 // Floating-point to integer conversions.
541 // i64 conversions are done via library routines even when generating VFP
542 // instructions, so use the same ones.
543 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
544 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
545 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
546 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
547
548 // Conversions between floating types.
549 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
550 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
551
552 // Integer to floating-point conversions.
553 // i64 conversions are done via library routines even when generating VFP
554 // instructions, so use the same ones.
555 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
556 // e.g., __floatunsidf vs. __floatunssidfvfp.
557 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
558 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
559 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
560 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
561 };
562
563 for (const auto &LC : LibraryCalls) {
564 setLibcallName(LC.Op, LC.Name);
565 if (LC.Cond != ISD::SETCC_INVALID)
566 setCmpLibcallCC(LC.Op, LC.Cond);
567 }
568 }
569 }
570
571 // These libcalls are not available in 32-bit.
572 setLibcallName(RTLIB::SHL_I128, nullptr);
573 setLibcallName(RTLIB::SRL_I128, nullptr);
574 setLibcallName(RTLIB::SRA_I128, nullptr);
575 setLibcallName(RTLIB::MUL_I128, nullptr);
576 setLibcallName(RTLIB::MULO_I64, nullptr);
577 setLibcallName(RTLIB::MULO_I128, nullptr);
578
579 // RTLIB
580 if (Subtarget->isAAPCS_ABI() &&
581 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
582 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
583 static const struct {
584 const RTLIB::Libcall Op;
585 const char * const Name;
586 const CallingConv::ID CC;
587 const ISD::CondCode Cond;
588 } LibraryCalls[] = {
589 // Double-precision floating-point arithmetic helper functions
590 // RTABI chapter 4.1.2, Table 2
591 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
592 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595
596 // Double-precision floating-point comparison helper functions
597 // RTABI chapter 4.1.2, Table 3
598 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
599 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
600 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
601 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
602 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
603 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
605
606 // Single-precision floating-point arithmetic helper functions
607 // RTABI chapter 4.1.2, Table 4
608 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
609 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612
613 // Single-precision floating-point comparison helper functions
614 // RTABI chapter 4.1.2, Table 5
615 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
616 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
617 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
618 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
619 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
620 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
622
623 // Floating-point to integer conversions.
624 // RTABI chapter 4.1.2, Table 6
625 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633
634 // Conversions between floating types.
635 // RTABI chapter 4.1.2, Table 7
636 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639
640 // Integer to floating-point conversions.
641 // RTABI chapter 4.1.2, Table 8
642 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650
651 // Long long helper functions
652 // RTABI chapter 4.2, Table 9
653 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657
658 // Integer division functions
659 // RTABI chapter 4.3.1
660 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 };
669
670 for (const auto &LC : LibraryCalls) {
671 setLibcallName(LC.Op, LC.Name);
672 setLibcallCallingConv(LC.Op, LC.CC);
673 if (LC.Cond != ISD::SETCC_INVALID)
674 setCmpLibcallCC(LC.Op, LC.Cond);
675 }
676
677 // EABI dependent RTLIB
678 if (TM.Options.EABIVersion == EABI::EABI4 ||
679 TM.Options.EABIVersion == EABI::EABI5) {
680 static const struct {
681 const RTLIB::Libcall Op;
682 const char *const Name;
683 const CallingConv::ID CC;
684 const ISD::CondCode Cond;
685 } MemOpsLibraryCalls[] = {
686 // Memory operations
687 // RTABI chapter 4.3.4
688 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
689 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
691 };
692
693 for (const auto &LC : MemOpsLibraryCalls) {
694 setLibcallName(LC.Op, LC.Name);
695 setLibcallCallingConv(LC.Op, LC.CC);
696 if (LC.Cond != ISD::SETCC_INVALID)
697 setCmpLibcallCC(LC.Op, LC.Cond);
698 }
699 }
700 }
701
702 if (Subtarget->isTargetWindows()) {
703 static const struct {
704 const RTLIB::Libcall Op;
705 const char * const Name;
706 const CallingConv::ID CC;
707 } LibraryCalls[] = {
708 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
709 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
710 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
711 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
712 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
713 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
716 };
717
718 for (const auto &LC : LibraryCalls) {
719 setLibcallName(LC.Op, LC.Name);
720 setLibcallCallingConv(LC.Op, LC.CC);
721 }
722 }
723
724 // Use divmod compiler-rt calls for iOS 5.0 and later.
725 if (Subtarget->isTargetMachO() &&
726 !(Subtarget->isTargetIOS() &&
727 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
728 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
729 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
730 }
731
732 // The half <-> float conversion functions are always soft-float on
733 // non-watchos platforms, but are needed for some targets which use a
734 // hard-float calling convention by default.
735 if (!Subtarget->isTargetWatchABI()) {
736 if (Subtarget->isAAPCS_ABI()) {
737 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
738 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
739 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
740 } else {
741 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
742 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
743 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
744 }
745 }
746
747 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
748 // a __gnu_ prefix (which is the default).
749 if (Subtarget->isTargetAEABI()) {
750 static const struct {
751 const RTLIB::Libcall Op;
752 const char * const Name;
753 const CallingConv::ID CC;
754 } LibraryCalls[] = {
755 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
756 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
757 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
758 };
759
760 for (const auto &LC : LibraryCalls) {
761 setLibcallName(LC.Op, LC.Name);
762 setLibcallCallingConv(LC.Op, LC.CC);
763 }
764 }
765
766 if (Subtarget->isThumb1Only())
767 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
768 else
769 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
770
771 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
772 Subtarget->hasFPRegs()) {
773 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
774 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
775
776 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
777 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
778 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
779 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
780
781 if (!Subtarget->hasVFP2Base())
782 setAllExpand(MVT::f32);
783 if (!Subtarget->hasFP64())
784 setAllExpand(MVT::f64);
785 }
786
787 if (Subtarget->hasFullFP16()) {
788 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
789 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
790 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
791
792 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
793 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
794 }
795
796 if (Subtarget->hasBF16()) {
797 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
798 setAllExpand(MVT::bf16);
799 if (!Subtarget->hasFullFP16())
800 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
801 }
802
803 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
804 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
805 setTruncStoreAction(VT, InnerVT, Expand);
806 addAllExtLoads(VT, InnerVT, Expand);
807 }
808
809 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
810 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
811
812 setOperationAction(ISD::BSWAP, VT, Expand);
813 }
814
815 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
816 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
817
818 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
819 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
820
821 if (Subtarget->hasMVEIntegerOps())
822 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
823
824 // Combine low-overhead loop intrinsics so that we can lower i1 types.
825 if (Subtarget->hasLOB()) {
826 setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});
827 }
828
829 if (Subtarget->hasNEON()) {
830 addDRTypeForNEON(MVT::v2f32);
831 addDRTypeForNEON(MVT::v8i8);
832 addDRTypeForNEON(MVT::v4i16);
833 addDRTypeForNEON(MVT::v2i32);
834 addDRTypeForNEON(MVT::v1i64);
835
836 addQRTypeForNEON(MVT::v4f32);
837 addQRTypeForNEON(MVT::v2f64);
838 addQRTypeForNEON(MVT::v16i8);
839 addQRTypeForNEON(MVT::v8i16);
840 addQRTypeForNEON(MVT::v4i32);
841 addQRTypeForNEON(MVT::v2i64);
842
843 if (Subtarget->hasFullFP16()) {
844 addQRTypeForNEON(MVT::v8f16);
845 addDRTypeForNEON(MVT::v4f16);
846 }
847
848 if (Subtarget->hasBF16()) {
849 addQRTypeForNEON(MVT::v8bf16);
850 addDRTypeForNEON(MVT::v4bf16);
851 }
852 }
853
854 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
855 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
856 // none of Neon, MVE or VFP supports any arithmetic operations on it.
857 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
858 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
859 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
860 // FIXME: Code duplication: FDIV and FREM are expanded always, see
861 // ARMTargetLowering::addTypeForNEON method for details.
862 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
863 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
864 // FIXME: Create unittest.
865 // In another words, find a way when "copysign" appears in DAG with vector
866 // operands.
867 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
868 // FIXME: Code duplication: SETCC has custom operation action, see
869 // ARMTargetLowering::addTypeForNEON method for details.
870 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
871 // FIXME: Create unittest for FNEG and for FABS.
872 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
873 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
874 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
875 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
876 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
877 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
878 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
879 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
880 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
881 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
882 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
883 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
884 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
885 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
886 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
887 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
888 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
889 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
890 }
891
892 if (Subtarget->hasNEON()) {
893 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
894 // supported for v4f32.
895 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
896 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
897 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
898 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
899 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
900 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
901 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
902 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
903 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
904 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
905 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
906 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
907 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
908 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
909
910 // Mark v2f32 intrinsics.
911 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
912 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
913 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
914 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
915 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
916 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
917 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
918 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
919 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
920 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
921 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
922 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
923 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
924 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
925
926 // Neon does not support some operations on v1i64 and v2i64 types.
927 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
928 // Custom handling for some quad-vector types to detect VMULL.
929 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
930 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
931 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
932 // Custom handling for some vector types to avoid expensive expansions
933 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
934 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
935 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
936 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
937 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
938 // a destination type that is wider than the source, and nor does
939 // it have a FP_TO_[SU]INT instruction with a narrower destination than
940 // source.
941 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
942 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
943 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
944 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
945 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
946 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
947 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
948 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
949
950 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
951 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
952
953 // NEON does not have single instruction CTPOP for vectors with element
954 // types wider than 8-bits. However, custom lowering can leverage the
955 // v8i8/v16i8 vcnt instruction.
956 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
957 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
958 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
959 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
960 setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
961 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
962
963 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
964 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
965
966 // NEON does not have single instruction CTTZ for vectors.
967 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
968 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
969 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
970 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
971
972 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
973 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
974 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
975 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
976
977 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
978 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
979 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
980 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
981
982 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
983 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
984 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
985 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
986
987 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
988 setOperationAction(ISD::MULHS, VT, Expand);
989 setOperationAction(ISD::MULHU, VT, Expand);
990 }
991
992 // NEON only has FMA instructions as of VFP4.
993 if (!Subtarget->hasVFP4Base()) {
994 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
995 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
996 }
997
998 setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT,
999 ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD});
1000
1001 // It is legal to extload from v4i8 to v4i16 or v4i32.
1002 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1003 MVT::v2i32}) {
1004 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1005 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
1006 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
1007 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
1008 }
1009 }
1010
1011 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1012 MVT::v4i32}) {
1013 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1014 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1015 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1016 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1017 }
1018 }
1019
1020 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1021 setTargetDAGCombine(
1022 {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR,
1023 ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1024 ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,
1025 ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
1026 ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});
1027 }
1028 if (Subtarget->hasMVEIntegerOps()) {
1029 setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
1030 ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,
1031 ISD::SETCC});
1032 }
1033 if (Subtarget->hasMVEFloatOps()) {
1034 setTargetDAGCombine(ISD::FADD);
1035 }
1036
1037 if (!Subtarget->hasFP64()) {
1038 // When targeting a floating-point unit with only single-precision
1039 // operations, f64 is legal for the few double-precision instructions which
1040 // are present However, no double-precision operations other than moves,
1041 // loads and stores are provided by the hardware.
1042 setOperationAction(ISD::FADD, MVT::f64, Expand);
1043 setOperationAction(ISD::FSUB, MVT::f64, Expand);
1044 setOperationAction(ISD::FMUL, MVT::f64, Expand);
1045 setOperationAction(ISD::FMA, MVT::f64, Expand);
1046 setOperationAction(ISD::FDIV, MVT::f64, Expand);
1047 setOperationAction(ISD::FREM, MVT::f64, Expand);
1048 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
1049 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
1050 setOperationAction(ISD::FNEG, MVT::f64, Expand);
1051 setOperationAction(ISD::FABS, MVT::f64, Expand);
1052 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
1053 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1054 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1055 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1056 setOperationAction(ISD::FLOG, MVT::f64, Expand);
1057 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
1058 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
1059 setOperationAction(ISD::FEXP, MVT::f64, Expand);
1060 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
1061 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
1062 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
1063 setOperationAction(ISD::FRINT, MVT::f64, Expand);
1064 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
1065 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
1066 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
1067 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
1068 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
1069 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
1070 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
1071 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
1072 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
1073 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
1074 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
1075 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
1076 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
1077 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
1078 }
1079
1080 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1081 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
1082 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
1083 if (Subtarget->hasFullFP16()) {
1084 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1085 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1086 }
1087 }
1088
1089 if (!Subtarget->hasFP16()) {
1090 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
1091 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
1092 }
1093
1094 computeRegisterProperties(Subtarget->getRegisterInfo());
1095
1096 // ARM does not have floating-point extending loads.
1097 for (MVT VT : MVT::fp_valuetypes()) {
1098 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1099 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1100 }
1101
1102 // ... or truncating stores
1103 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1104 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1105 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1106
1107 // ARM does not have i1 sign extending load.
1108 for (MVT VT : MVT::integer_valuetypes())
1109 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1110
1111 // ARM supports all 4 flavors of integer indexed load / store.
1112 if (!Subtarget->isThumb1Only()) {
1113 for (unsigned im = (unsigned)ISD::PRE_INC;
1114 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1115 setIndexedLoadAction(im, MVT::i1, Legal);
1116 setIndexedLoadAction(im, MVT::i8, Legal);
1117 setIndexedLoadAction(im, MVT::i16, Legal);
1118 setIndexedLoadAction(im, MVT::i32, Legal);
1119 setIndexedStoreAction(im, MVT::i1, Legal);
1120 setIndexedStoreAction(im, MVT::i8, Legal);
1121 setIndexedStoreAction(im, MVT::i16, Legal);
1122 setIndexedStoreAction(im, MVT::i32, Legal);
1123 }
1124 } else {
1125 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1126 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
1127 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
1128 }
1129
1130 setOperationAction(ISD::SADDO, MVT::i32, Custom);
1131 setOperationAction(ISD::UADDO, MVT::i32, Custom);
1132 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
1133 setOperationAction(ISD::USUBO, MVT::i32, Custom);
1134
1135 setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
1136 setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
1137 if (Subtarget->hasDSP()) {
1138 setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1139 setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1140 setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1141 setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1142 setOperationAction(ISD::UADDSAT, MVT::i8, Custom);
1143 setOperationAction(ISD::USUBSAT, MVT::i8, Custom);
1144 setOperationAction(ISD::UADDSAT, MVT::i16, Custom);
1145 setOperationAction(ISD::USUBSAT, MVT::i16, Custom);
1146 }
1147 if (Subtarget->hasBaseDSP()) {
1148 setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1149 setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
1150 }
1151
1152 // i64 operation support.
1153 setOperationAction(ISD::MUL, MVT::i64, Expand);
1154 setOperationAction(ISD::MULHU, MVT::i32, Expand);
1155 if (Subtarget->isThumb1Only()) {
1156 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
1157 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
1158 }
1159 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1160 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1161 setOperationAction(ISD::MULHS, MVT::i32, Expand);
1162
1163 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
1164 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
1165 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
1166 setOperationAction(ISD::SRL, MVT::i64, Custom);
1167 setOperationAction(ISD::SRA, MVT::i64, Custom);
1168 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1169 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1170 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1171 setOperationAction(ISD::STORE, MVT::i64, Custom);
1172
1173 // MVE lowers 64 bit shifts to lsll and lsrl
1174 // assuming that ISD::SRL and SRA of i64 are already marked custom
1175 if (Subtarget->hasMVEIntegerOps())
1176 setOperationAction(ISD::SHL, MVT::i64, Custom);
1177
1178 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1179 if (Subtarget->isThumb1Only()) {
1180 setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
1181 setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
1182 setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
1183 }
1184
1185 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1186 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1187
1188 // ARM does not have ROTL.
1189 setOperationAction(ISD::ROTL, MVT::i32, Expand);
1190 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1191 setOperationAction(ISD::ROTL, VT, Expand);
1192 setOperationAction(ISD::ROTR, VT, Expand);
1193 }
1194 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1195 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1196 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1197 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
1198 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
1199 }
1200
1201 // @llvm.readcyclecounter requires the Performance Monitors extension.
1202 // Default to the 0 expansion on unsupported platforms.
1203 // FIXME: Technically there are older ARM CPUs that have
1204 // implementation-specific ways of obtaining this information.
1205 if (Subtarget->hasPerfMon())
1206 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1207
1208 // Only ARMv6 has BSWAP.
1209 if (!Subtarget->hasV6Ops())
1210 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
1211
1212 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1213 : Subtarget->hasDivideInARMMode();
1214 if (!hasDivide) {
1215 // These are expanded into libcalls if the cpu doesn't have HW divider.
1216 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
1217 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
1218 }
1219
1220 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1221 setOperationAction(ISD::SDIV, MVT::i32, Custom);
1222 setOperationAction(ISD::UDIV, MVT::i32, Custom);
1223
1224 setOperationAction(ISD::SDIV, MVT::i64, Custom);
1225 setOperationAction(ISD::UDIV, MVT::i64, Custom);
1226 }
1227
1228 setOperationAction(ISD::SREM, MVT::i32, Expand);
1229 setOperationAction(ISD::UREM, MVT::i32, Expand);
1230
1231 // Register based DivRem for AEABI (RTABI 4.2)
1232 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1233 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1234 Subtarget->isTargetWindows()) {
1235 setOperationAction(ISD::SREM, MVT::i64, Custom);
1236 setOperationAction(ISD::UREM, MVT::i64, Custom);
1237 HasStandaloneRem = false;
1238
1239 if (Subtarget->isTargetWindows()) {
1240 const struct {
1241 const RTLIB::Libcall Op;
1242 const char * const Name;
1243 const CallingConv::ID CC;
1244 } LibraryCalls[] = {
1245 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1246 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1247 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1248 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1249
1250 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1251 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1252 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1253 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1254 };
1255
1256 for (const auto &LC : LibraryCalls) {
1257 setLibcallName(LC.Op, LC.Name);
1258 setLibcallCallingConv(LC.Op, LC.CC);
1259 }
1260 } else {
1261 const struct {
1262 const RTLIB::Libcall Op;
1263 const char * const Name;
1264 const CallingConv::ID CC;
1265 } LibraryCalls[] = {
1266 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1267 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1268 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1269 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1270
1271 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1272 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1273 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1274 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1275 };
1276
1277 for (const auto &LC : LibraryCalls) {
1278 setLibcallName(LC.Op, LC.Name);
1279 setLibcallCallingConv(LC.Op, LC.CC);
1280 }
1281 }
1282
1283 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
1284 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
1285 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
1286 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
1287 } else {
1288 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
1289 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
1290 }
1291
1292 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1293 // MSVCRT doesn't have powi; fall back to pow
1294 setLibcallName(RTLIB::POWI_F32, nullptr);
1295 setLibcallName(RTLIB::POWI_F64, nullptr);
1296 }
1297
1298 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1299 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1300 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1301 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1302
1303 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1304 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1305
1306 // Use the default implementation.
1307 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1308 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1309 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1310 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1311 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1312 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1313
1314 if (Subtarget->isTargetWindows())
1315 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1316 else
1317 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1318
1319 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1320 // the default expansion.
1321 InsertFencesForAtomic = false;
1322 if (Subtarget->hasAnyDataBarrier() &&
1323 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1324 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1325 // to ldrex/strex loops already.
1326 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1327 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1328 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1329
1330 // On v8, we have particularly efficient implementations of atomic fences
1331 // if they can be combined with nearby atomic loads and stores.
1332 if (!Subtarget->hasAcquireRelease() ||
1333 getTargetMachine().getOptLevel() == 0) {
1334 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1335 InsertFencesForAtomic = true;
1336 }
1337 } else {
1338 // If there's anything we can use as a barrier, go through custom lowering
1339 // for ATOMIC_FENCE.
1340 // If target has DMB in thumb, Fences can be inserted.
1341 if (Subtarget->hasDataBarrier())
1342 InsertFencesForAtomic = true;
1343
1344 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1345 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1346
1347 // Set them all for expansion, which will force libcalls.
1348 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1349 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1350 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1351 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1352 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1353 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1354 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1355 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1356 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1357 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1358 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1359 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1360 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1361 // Unordered/Monotonic case.
1362 if (!InsertFencesForAtomic) {
1363 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1364 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1365 }
1366 }
1367
1368 // Compute supported atomic widths.
1369 if (Subtarget->isTargetLinux() ||
1370 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1371 // For targets where __sync_* routines are reliably available, we use them
1372 // if necessary.
1373 //
1374 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1375 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1376 //
1377 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1378 // such targets should provide __sync_* routines, which use the ARM mode
1379 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1380 // encoding; see ARMISD::MEMBARRIER_MCR.)
1381 setMaxAtomicSizeInBitsSupported(64);
1382 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1383 Subtarget->hasForced32BitAtomics()) {
1384 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1385 setMaxAtomicSizeInBitsSupported(32);
1386 } else {
1387 // We can't assume anything about other targets; just use libatomic
1388 // routines.
1389 setMaxAtomicSizeInBitsSupported(0);
1390 }
1391
1392 setMaxDivRemBitWidthSupported(64);
1393
1394 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1395
1396 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1397 if (!Subtarget->hasV6Ops()) {
1398 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1399 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1400 }
1401 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1402
1403 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1404 !Subtarget->isThumb1Only()) {
1405 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1406 // iff target supports vfp2.
1407 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1408 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
1409 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1410 }
1411
1412 // We want to custom lower some of our intrinsics.
1413 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1414 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1415 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1416 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1417 if (Subtarget->useSjLjEH())
1418 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1419
1420 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1421 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1422 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1423 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1424 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1425 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1426 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1427 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1428 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1429 if (Subtarget->hasFullFP16()) {
1430 setOperationAction(ISD::SETCC, MVT::f16, Expand);
1431 setOperationAction(ISD::SELECT, MVT::f16, Custom);
1432 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
1433 }
1434
1435 setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
1436
1437 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1438 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1439 if (Subtarget->hasFullFP16())
1440 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1441 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1442 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1443 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1444
1445 // We don't support sin/cos/fmod/copysign/pow
1446 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1447 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1448 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1449 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1450 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1451 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1452 setOperationAction(ISD::FREM, MVT::f64, Expand);
1453 setOperationAction(ISD::FREM, MVT::f32, Expand);
1454 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1455 !Subtarget->isThumb1Only()) {
1456 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1457 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1458 }
1459 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1460 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1461
1462 if (!Subtarget->hasVFP4Base()) {
1463 setOperationAction(ISD::FMA, MVT::f64, Expand);
1464 setOperationAction(ISD::FMA, MVT::f32, Expand);
1465 }
1466
1467 // Various VFP goodness
1468 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1469 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1470 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1471 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1472 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1473 }
1474
1475 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1476 if (!Subtarget->hasFP16()) {
1477 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1478 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1479 }
1480
1481 // Strict floating-point comparisons need custom lowering.
1482 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1483 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1484 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
1485 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
1486 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
1487 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
1488 }
1489
1490 // Use __sincos_stret if available.
1491 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1492 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1493 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1494 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1495 }
1496
1497 // FP-ARMv8 implements a lot of rounding-like FP operations.
1498 if (Subtarget->hasFPARMv8Base()) {
1499 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1500 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1501 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1502 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1503 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1504 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1505 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1506 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1507 if (Subtarget->hasNEON()) {
1508 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1509 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1510 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1511 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1512 }
1513
1514 if (Subtarget->hasFP64()) {
1515 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1516 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1517 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1518 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1519 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1520 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1521 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1522 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1523 }
1524 }
1525
1526 // FP16 often need to be promoted to call lib functions
1527 if (Subtarget->hasFullFP16()) {
1528 setOperationAction(ISD::FREM, MVT::f16, Promote);
1529 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
1530 setOperationAction(ISD::FSIN, MVT::f16, Promote);
1531 setOperationAction(ISD::FCOS, MVT::f16, Promote);
1532 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
1533 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
1534 setOperationAction(ISD::FPOW, MVT::f16, Promote);
1535 setOperationAction(ISD::FEXP, MVT::f16, Promote);
1536 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
1537 setOperationAction(ISD::FLOG, MVT::f16, Promote);
1538 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
1539 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
1540
1541 setOperationAction(ISD::FROUND, MVT::f16, Legal);
1542 }
1543
1544 if (Subtarget->hasNEON()) {
1545 // vmin and vmax aren't available in a scalar form, so we can use
1546 // a NEON instruction with an undef lane instead. This has a performance
1547 // penalty on some cores, so we don't do this unless we have been
1548 // asked to by the core tuning model.
1549 if (Subtarget->useNEONForSinglePrecisionFP()) {
1550 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1551 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1552 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1553 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1554 }
1555 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1556 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1557 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1558 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1559
1560 if (Subtarget->hasFullFP16()) {
1561 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1562 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1563 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1564 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1565
1566 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1567 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1568 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1569 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1570 }
1571 }
1572
1573 // We have target-specific dag combine patterns for the following nodes:
1574 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1575 setTargetDAGCombine(
1576 {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR});
1577
1578 if (Subtarget->hasMVEIntegerOps())
1579 setTargetDAGCombine(ISD::VSELECT);
1580
1581 if (Subtarget->hasV6Ops())
1582 setTargetDAGCombine(ISD::SRL);
1583 if (Subtarget->isThumb1Only())
1584 setTargetDAGCombine(ISD::SHL);
1585 // Attempt to lower smin/smax to ssat/usat
1586 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1587 Subtarget->isThumb2()) {
1588 setTargetDAGCombine({ISD::SMIN, ISD::SMAX});
1589 }
1590
1591 setStackPointerRegisterToSaveRestore(ARM::SP);
1592
1593 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1594 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1595 setSchedulingPreference(Sched::RegPressure);
1596 else
1597 setSchedulingPreference(Sched::Hybrid);
1598
1599 //// temporary - rewrite interface to use type
1600 MaxStoresPerMemset = 8;
1601 MaxStoresPerMemsetOptSize = 4;
1602 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1603 MaxStoresPerMemcpyOptSize = 2;
1604 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1605 MaxStoresPerMemmoveOptSize = 2;
1606
1607 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1608 // are at least 4 bytes aligned.
1609 setMinStackArgumentAlignment(Align(4));
1610
1611 // Prefer likely predicted branches to selects on out-of-order cores.
1612 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1613
1614 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1615
1616 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1617
1618 if (Subtarget->isThumb() || Subtarget->isThumb2())
1619 setTargetDAGCombine(ISD::ABS);
1620}
1621
1622bool ARMTargetLowering::useSoftFloat() const {
1623 return Subtarget->useSoftFloat();
1624}
1625
1626// FIXME: It might make sense to define the representative register class as the
1627// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1628// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1629// SPR's representative would be DPR_VFP2. This should work well if register
1630// pressure tracking were modified such that a register use would increment the
1631// pressure of the register class's representative and all of it's super
1632// classes' representatives transitively. We have not implemented this because
1633// of the difficulty prior to coalescing of modeling operand register classes
1634// due to the common occurrence of cross class copies and subregister insertions
1635// and extractions.
1636std::pair<const TargetRegisterClass *, uint8_t>
1637ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1638 MVT VT) const {
1639 const TargetRegisterClass *RRC = nullptr;
1640 uint8_t Cost = 1;
1641 switch (VT.SimpleTy) {
1642 default:
1643 return TargetLowering::findRepresentativeClass(TRI, VT);
1644 // Use DPR as representative register class for all floating point
1645 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1646 // the cost is 1 for both f32 and f64.
1647 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1648 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1649 RRC = &ARM::DPRRegClass;
1650 // When NEON is used for SP, only half of the register file is available
1651 // because operations that define both SP and DP results will be constrained
1652 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1653 // coalescing by double-counting the SP regs. See the FIXME above.
1654 if (Subtarget->useNEONForSinglePrecisionFP())
1655 Cost = 2;
1656 break;
1657 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1658 case MVT::v4f32: case MVT::v2f64:
1659 RRC = &ARM::DPRRegClass;
1660 Cost = 2;
1661 break;
1662 case MVT::v4i64:
1663 RRC = &ARM::DPRRegClass;
1664 Cost = 4;
1665 break;
1666 case MVT::v8i64:
1667 RRC = &ARM::DPRRegClass;
1668 Cost = 8;
1669 break;
1670 }
1671 return std::make_pair(RRC, Cost);
1672}
1673
1674const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1675#define MAKE_CASE(V) \
1676 case V: \
1677 return #V;
1678 switch ((ARMISD::NodeType)Opcode) {
1679 case ARMISD::FIRST_NUMBER:
1680 break;
1681 MAKE_CASE(ARMISD::Wrapper)
1682 MAKE_CASE(ARMISD::WrapperPIC)
1683 MAKE_CASE(ARMISD::WrapperJT)
1684 MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL)
1685 MAKE_CASE(ARMISD::CALL)
1686 MAKE_CASE(ARMISD::CALL_PRED)
1687 MAKE_CASE(ARMISD::CALL_NOLINK)
1688 MAKE_CASE(ARMISD::tSECALL)
1689 MAKE_CASE(ARMISD::t2CALL_BTI)
1690 MAKE_CASE(ARMISD::BRCOND)
1691 MAKE_CASE(ARMISD::BR_JT)
1692 MAKE_CASE(ARMISD::BR2_JT)
1693 MAKE_CASE(ARMISD::RET_GLUE)
1694 MAKE_CASE(ARMISD::SERET_GLUE)
1695 MAKE_CASE(ARMISD::INTRET_GLUE)
1696 MAKE_CASE(ARMISD::PIC_ADD)
1697 MAKE_CASE(ARMISD::CMP)
1698 MAKE_CASE(ARMISD::CMN)
1699 MAKE_CASE(ARMISD::CMPZ)
1700 MAKE_CASE(ARMISD::CMPFP)
1701 MAKE_CASE(ARMISD::CMPFPE)
1702 MAKE_CASE(ARMISD::CMPFPw0)
1703 MAKE_CASE(ARMISD::CMPFPEw0)
1704 MAKE_CASE(ARMISD::BCC_i64)
1705 MAKE_CASE(ARMISD::FMSTAT)
1706 MAKE_CASE(ARMISD::CMOV)
1707 MAKE_CASE(ARMISD::SUBS)
1708 MAKE_CASE(ARMISD::SSAT)
1709 MAKE_CASE(ARMISD::USAT)
1710 MAKE_CASE(ARMISD::ASRL)
1711 MAKE_CASE(ARMISD::LSRL)
1712 MAKE_CASE(ARMISD::LSLL)
1713 MAKE_CASE(ARMISD::SRL_GLUE)
1714 MAKE_CASE(ARMISD::SRA_GLUE)
1715 MAKE_CASE(ARMISD::RRX)
1716 MAKE_CASE(ARMISD::ADDC)
1717 MAKE_CASE(ARMISD::ADDE)
1718 MAKE_CASE(ARMISD::SUBC)
1719 MAKE_CASE(ARMISD::SUBE)
1720 MAKE_CASE(ARMISD::LSLS)
1721 MAKE_CASE(ARMISD::VMOVRRD)
1722 MAKE_CASE(ARMISD::VMOVDRR)
1723 MAKE_CASE(ARMISD::VMOVhr)
1724 MAKE_CASE(ARMISD::VMOVrh)
1725 MAKE_CASE(ARMISD::VMOVSR)
1726 MAKE_CASE(ARMISD::EH_SJLJ_SETJMP)
1727 MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP)
1728 MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH)
1729 MAKE_CASE(ARMISD::TC_RETURN)
1730 MAKE_CASE(ARMISD::THREAD_POINTER)
1731 MAKE_CASE(ARMISD::DYN_ALLOC)
1732 MAKE_CASE(ARMISD::MEMBARRIER_MCR)
1733 MAKE_CASE(ARMISD::PRELOAD)
1734 MAKE_CASE(ARMISD::LDRD)
1735 MAKE_CASE(ARMISD::STRD)
1736 MAKE_CASE(ARMISD::WIN__CHKSTK)
1737 MAKE_CASE(ARMISD::WIN__DBZCHK)
1738 MAKE_CASE(ARMISD::PREDICATE_CAST)
1739 MAKE_CASE(ARMISD::VECTOR_REG_CAST)
1740 MAKE_CASE(ARMISD::MVESEXT)
1741 MAKE_CASE(ARMISD::MVEZEXT)
1742 MAKE_CASE(ARMISD::MVETRUNC)
1743 MAKE_CASE(ARMISD::VCMP)
1744 MAKE_CASE(ARMISD::VCMPZ)
1745 MAKE_CASE(ARMISD::VTST)
1746 MAKE_CASE(ARMISD::VSHLs)
1747 MAKE_CASE(ARMISD::VSHLu)
1748 MAKE_CASE(ARMISD::VSHLIMM)
1749 MAKE_CASE(ARMISD::VSHRsIMM)
1750 MAKE_CASE(ARMISD::VSHRuIMM)
1751 MAKE_CASE(ARMISD::VRSHRsIMM)
1752 MAKE_CASE(ARMISD::VRSHRuIMM)
1753 MAKE_CASE(ARMISD::VRSHRNIMM)
1754 MAKE_CASE(ARMISD::VQSHLsIMM)
1755 MAKE_CASE(ARMISD::VQSHLuIMM)
1756 MAKE_CASE(ARMISD::VQSHLsuIMM)
1757 MAKE_CASE(ARMISD::VQSHRNsIMM)
1758 MAKE_CASE(ARMISD::VQSHRNuIMM)
1759 MAKE_CASE(ARMISD::VQSHRNsuIMM)
1760 MAKE_CASE(ARMISD::VQRSHRNsIMM)
1761 MAKE_CASE(ARMISD::VQRSHRNuIMM)
1762 MAKE_CASE(ARMISD::VQRSHRNsuIMM)
1763 MAKE_CASE(ARMISD::VSLIIMM)
1764 MAKE_CASE(ARMISD::VSRIIMM)
1765 MAKE_CASE(ARMISD::VGETLANEu)
1766 MAKE_CASE(ARMISD::VGETLANEs)
1767 MAKE_CASE(ARMISD::VMOVIMM)
1768 MAKE_CASE(ARMISD::VMVNIMM)
1769 MAKE_CASE(ARMISD::VMOVFPIMM)
1770 MAKE_CASE(ARMISD::VDUP)
1771 MAKE_CASE(ARMISD::VDUPLANE)
1772 MAKE_CASE(ARMISD::VEXT)
1773 MAKE_CASE(ARMISD::VREV64)
1774 MAKE_CASE(ARMISD::VREV32)
1775 MAKE_CASE(ARMISD::VREV16)
1776 MAKE_CASE(ARMISD::VZIP)
1777 MAKE_CASE(ARMISD::VUZP)
1778 MAKE_CASE(ARMISD::VTRN)
1779 MAKE_CASE(ARMISD::VTBL1)
1780 MAKE_CASE(ARMISD::VTBL2)
1781 MAKE_CASE(ARMISD::VMOVN)
1782 MAKE_CASE(ARMISD::VQMOVNs)
1783 MAKE_CASE(ARMISD::VQMOVNu)
1784 MAKE_CASE(ARMISD::VCVTN)
1785 MAKE_CASE(ARMISD::VCVTL)
1786 MAKE_CASE(ARMISD::VIDUP)
1787 MAKE_CASE(ARMISD::VMULLs)
1788 MAKE_CASE(ARMISD::VMULLu)
1789 MAKE_CASE(ARMISD::VQDMULH)
1790 MAKE_CASE(ARMISD::VADDVs)
1791 MAKE_CASE(ARMISD::VADDVu)
1792 MAKE_CASE(ARMISD::VADDVps)
1793 MAKE_CASE(ARMISD::VADDVpu)
1794 MAKE_CASE(ARMISD::VADDLVs)
1795 MAKE_CASE(ARMISD::VADDLVu)
1796 MAKE_CASE(ARMISD::VADDLVAs)
1797 MAKE_CASE(ARMISD::VADDLVAu)
1798 MAKE_CASE(ARMISD::VADDLVps)
1799 MAKE_CASE(ARMISD::VADDLVpu)
1800 MAKE_CASE(ARMISD::VADDLVAps)
1801 MAKE_CASE(ARMISD::VADDLVApu)
1802 MAKE_CASE(ARMISD::VMLAVs)
1803 MAKE_CASE(ARMISD::VMLAVu)
1804 MAKE_CASE(ARMISD::VMLAVps)
1805 MAKE_CASE(ARMISD::VMLAVpu)
1806 MAKE_CASE(ARMISD::VMLALVs)
1807 MAKE_CASE(ARMISD::VMLALVu)
1808 MAKE_CASE(ARMISD::VMLALVps)
1809 MAKE_CASE(ARMISD::VMLALVpu)
1810 MAKE_CASE(ARMISD::VMLALVAs)
1811 MAKE_CASE(ARMISD::VMLALVAu)
1812 MAKE_CASE(ARMISD::VMLALVAps)
1813 MAKE_CASE(ARMISD::VMLALVApu)
1814 MAKE_CASE(ARMISD::VMINVu)
1815 MAKE_CASE(ARMISD::VMINVs)
1816 MAKE_CASE(ARMISD::VMAXVu)
1817 MAKE_CASE(ARMISD::VMAXVs)
1818 MAKE_CASE(ARMISD::UMAAL)
1819 MAKE_CASE(ARMISD::UMLAL)
1820 MAKE_CASE(ARMISD::SMLAL)
1821 MAKE_CASE(ARMISD::SMLALBB)
1822 MAKE_CASE(ARMISD::SMLALBT)
1823 MAKE_CASE(ARMISD::SMLALTB)
1824 MAKE_CASE(ARMISD::SMLALTT)
1825 MAKE_CASE(ARMISD::SMULWB)
1826 MAKE_CASE(ARMISD::SMULWT)
1827 MAKE_CASE(ARMISD::SMLALD)
1828 MAKE_CASE(ARMISD::SMLALDX)
1829 MAKE_CASE(ARMISD::SMLSLD)
1830 MAKE_CASE(ARMISD::SMLSLDX)
1831 MAKE_CASE(ARMISD::SMMLAR)
1832 MAKE_CASE(ARMISD::SMMLSR)
1833 MAKE_CASE(ARMISD::QADD16b)
1834 MAKE_CASE(ARMISD::QSUB16b)
1835 MAKE_CASE(ARMISD::QADD8b)
1836 MAKE_CASE(ARMISD::QSUB8b)
1837 MAKE_CASE(ARMISD::UQADD16b)
1838 MAKE_CASE(ARMISD::UQSUB16b)
1839 MAKE_CASE(ARMISD::UQADD8b)
1840 MAKE_CASE(ARMISD::UQSUB8b)
1841 MAKE_CASE(ARMISD::BUILD_VECTOR)
1842 MAKE_CASE(ARMISD::BFI)
1843 MAKE_CASE(ARMISD::VORRIMM)
1844 MAKE_CASE(ARMISD::VBICIMM)
1845 MAKE_CASE(ARMISD::VBSP)
1846 MAKE_CASE(ARMISD::MEMCPY)
1847 MAKE_CASE(ARMISD::VLD1DUP)
1848 MAKE_CASE(ARMISD::VLD2DUP)
1849 MAKE_CASE(ARMISD::VLD3DUP)
1850 MAKE_CASE(ARMISD::VLD4DUP)
1851 MAKE_CASE(ARMISD::VLD1_UPD)
1852 MAKE_CASE(ARMISD::VLD2_UPD)
1853 MAKE_CASE(ARMISD::VLD3_UPD)
1854 MAKE_CASE(ARMISD::VLD4_UPD)
1855 MAKE_CASE(ARMISD::VLD1x2_UPD)
1856 MAKE_CASE(ARMISD::VLD1x3_UPD)
1857 MAKE_CASE(ARMISD::VLD1x4_UPD)
1858 MAKE_CASE(ARMISD::VLD2LN_UPD)
1859 MAKE_CASE(ARMISD::VLD3LN_UPD)
1860 MAKE_CASE(ARMISD::VLD4LN_UPD)
1861 MAKE_CASE(ARMISD::VLD1DUP_UPD)
1862 MAKE_CASE(ARMISD::VLD2DUP_UPD)
1863 MAKE_CASE(ARMISD::VLD3DUP_UPD)
1864 MAKE_CASE(ARMISD::VLD4DUP_UPD)
1865 MAKE_CASE(ARMISD::VST1_UPD)
1866 MAKE_CASE(ARMISD::VST2_UPD)
1867 MAKE_CASE(ARMISD::VST3_UPD)
1868 MAKE_CASE(ARMISD::VST4_UPD)
1869 MAKE_CASE(ARMISD::VST1x2_UPD)
1870 MAKE_CASE(ARMISD::VST1x3_UPD)
1871 MAKE_CASE(ARMISD::VST1x4_UPD)
1872 MAKE_CASE(ARMISD::VST2LN_UPD)
1873 MAKE_CASE(ARMISD::VST3LN_UPD)
1874 MAKE_CASE(ARMISD::VST4LN_UPD)
1875 MAKE_CASE(ARMISD::WLS)
1876 MAKE_CASE(ARMISD::WLSSETUP)
1877 MAKE_CASE(ARMISD::LE)
1878 MAKE_CASE(ARMISD::LOOP_DEC)
1879 MAKE_CASE(ARMISD::CSINV)
1880 MAKE_CASE(ARMISD::CSNEG)
1881 MAKE_CASE(ARMISD::CSINC)
1882 MAKE_CASE(ARMISD::MEMCPYLOOP)
1883 MAKE_CASE(ARMISD::MEMSETLOOP)
1884#undef MAKE_CASE
1885 }
1886 return nullptr;
1887}
1888
1889EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1890 EVT VT) const {
1891 if (!VT.isVector())
1892 return getPointerTy(DL);
1893
1894 // MVE has a predicate register.
1895 if ((Subtarget->hasMVEIntegerOps() &&
1896 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1897 VT == MVT::v16i8)) ||
1898 (Subtarget->hasMVEFloatOps() &&
1899 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1900 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1901 return VT.changeVectorElementTypeToInteger();
1902}
1903
1904/// getRegClassFor - Return the register class that should be used for the
1905/// specified value type.
1906const TargetRegisterClass *
1907ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1908 (void)isDivergent;
1909 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1910 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1911 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1912 // MVE Q registers.
1913 if (Subtarget->hasNEON()) {
1914 if (VT == MVT::v4i64)
1915 return &ARM::QQPRRegClass;
1916 if (VT == MVT::v8i64)
1917 return &ARM::QQQQPRRegClass;
1918 }
1919 if (Subtarget->hasMVEIntegerOps()) {
1920 if (VT == MVT::v4i64)
1921 return &ARM::MQQPRRegClass;
1922 if (VT == MVT::v8i64)
1923 return &ARM::MQQQQPRRegClass;
1924 }
1925 return TargetLowering::getRegClassFor(VT);
1926}
1927
1928// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1929// source/dest is aligned and the copy size is large enough. We therefore want
1930// to align such objects passed to memory intrinsics.
1931bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1932 Align &PrefAlign) const {
1933 if (!isa<MemIntrinsic>(CI))
1934 return false;
1935 MinSize = 8;
1936 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1937 // cycle faster than 4-byte aligned LDM.
1938 PrefAlign =
1939 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1940 return true;
1941}
1942
1943// Create a fast isel object.
1944FastISel *
1945ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1946 const TargetLibraryInfo *libInfo) const {
1947 return ARM::createFastISel(funcInfo, libInfo);
1948}
1949
1950Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1951 unsigned NumVals = N->getNumValues();
1952 if (!NumVals)
1953 return Sched::RegPressure;
1954
1955 for (unsigned i = 0; i != NumVals; ++i) {
1956 EVT VT = N->getValueType(i);
1957 if (VT == MVT::Glue || VT == MVT::Other)
1958 continue;
1959 if (VT.isFloatingPoint() || VT.isVector())
1960 return Sched::ILP;
1961 }
1962
1963 if (!N->isMachineOpcode())
1964 return Sched::RegPressure;
1965
1966 // Load are scheduled for latency even if there instruction itinerary
1967 // is not available.
1968 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1969 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1970
1971 if (MCID.getNumDefs() == 0)
1972 return Sched::RegPressure;
1973 if (!Itins->isEmpty() &&
1974 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1975 return Sched::ILP;
1976
1977 return Sched::RegPressure;
1978}
1979
1980//===----------------------------------------------------------------------===//
1981// Lowering Code
1982//===----------------------------------------------------------------------===//
1983
1984static bool isSRL16(const SDValue &Op) {
1985 if (Op.getOpcode() != ISD::SRL)
1986 return false;
1987 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1988 return Const->getZExtValue() == 16;
1989 return false;
1990}
1991
1992static bool isSRA16(const SDValue &Op) {
1993 if (Op.getOpcode() != ISD::SRA)
1994 return false;
1995 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1996 return Const->getZExtValue() == 16;
1997 return false;
1998}
1999
2000static bool isSHL16(const SDValue &Op) {
2001 if (Op.getOpcode() != ISD::SHL)
2002 return false;
2003 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2004 return Const->getZExtValue() == 16;
2005 return false;
2006}
2007
2008// Check for a signed 16-bit value. We special case SRA because it makes it
2009// more simple when also looking for SRAs that aren't sign extending a
2010// smaller value. Without the check, we'd need to take extra care with
2011// checking order for some operations.
2012static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2013 if (isSRA16(Op))
2014 return isSHL16(Op.getOperand(0));
2015 return DAG.ComputeNumSignBits(Op) == 17;
2016}
2017
2018/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2019static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
2020 switch (CC) {
2021 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2021)
;
2022 case ISD::SETNE: return ARMCC::NE;
2023 case ISD::SETEQ: return ARMCC::EQ;
2024 case ISD::SETGT: return ARMCC::GT;
2025 case ISD::SETGE: return ARMCC::GE;
2026 case ISD::SETLT: return ARMCC::LT;
2027 case ISD::SETLE: return ARMCC::LE;
2028 case ISD::SETUGT: return ARMCC::HI;
2029 case ISD::SETUGE: return ARMCC::HS;
2030 case ISD::SETULT: return ARMCC::LO;
2031 case ISD::SETULE: return ARMCC::LS;
2032 }
2033}
2034
2035/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2036static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
2037 ARMCC::CondCodes &CondCode2) {
2038 CondCode2 = ARMCC::AL;
2039 switch (CC) {
2040 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2040)
;
2041 case ISD::SETEQ:
2042 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2043 case ISD::SETGT:
2044 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2045 case ISD::SETGE:
2046 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2047 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2048 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2049 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2050 case ISD::SETO: CondCode = ARMCC::VC; break;
2051 case ISD::SETUO: CondCode = ARMCC::VS; break;
2052 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2053 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2054 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2055 case ISD::SETLT:
2056 case ISD::SETULT: CondCode = ARMCC::LT; break;
2057 case ISD::SETLE:
2058 case ISD::SETULE: CondCode = ARMCC::LE; break;
2059 case ISD::SETNE:
2060 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2061 }
2062}
2063
2064//===----------------------------------------------------------------------===//
2065// Calling Convention Implementation
2066//===----------------------------------------------------------------------===//
2067
2068/// getEffectiveCallingConv - Get the effective calling convention, taking into
2069/// account presence of floating point hardware and calling convention
2070/// limitations, such as support for variadic functions.
2071CallingConv::ID
2072ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2073 bool isVarArg) const {
2074 switch (CC) {
2075 default:
2076 report_fatal_error("Unsupported calling convention");
2077 case CallingConv::ARM_AAPCS:
2078 case CallingConv::ARM_APCS:
2079 case CallingConv::GHC:
2080 case CallingConv::CFGuard_Check:
2081 return CC;
2082 case CallingConv::PreserveMost:
2083 return CallingConv::PreserveMost;
2084 case CallingConv::PreserveAll:
2085 return CallingConv::PreserveAll;
2086 case CallingConv::ARM_AAPCS_VFP:
2087 case CallingConv::Swift:
2088 case CallingConv::SwiftTail:
2089 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
2090 case CallingConv::C:
2091 case CallingConv::Tail:
2092 if (!Subtarget->isAAPCS_ABI())
2093 return CallingConv::ARM_APCS;
2094 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2095 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2096 !isVarArg)
2097 return CallingConv::ARM_AAPCS_VFP;
2098 else
2099 return CallingConv::ARM_AAPCS;
2100 case CallingConv::Fast:
2101 case CallingConv::CXX_FAST_TLS:
2102 if (!Subtarget->isAAPCS_ABI()) {
2103 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2104 return CallingConv::Fast;
2105 return CallingConv::ARM_APCS;
2106 } else if (Subtarget->hasVFP2Base() &&
2107 !Subtarget->isThumb1Only() && !isVarArg)
2108 return CallingConv::ARM_AAPCS_VFP;
2109 else
2110 return CallingConv::ARM_AAPCS;
2111 }
2112}
2113
2114CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2115 bool isVarArg) const {
2116 return CCAssignFnForNode(CC, false, isVarArg);
2117}
2118
2119CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
2120 bool isVarArg) const {
2121 return CCAssignFnForNode(CC, true, isVarArg);
2122}
2123
2124/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2125/// CallingConvention.
2126CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2127 bool Return,
2128 bool isVarArg) const {
2129 switch (getEffectiveCallingConv(CC, isVarArg)) {
2130 default:
2131 report_fatal_error("Unsupported calling convention");
2132 case CallingConv::ARM_APCS:
2133 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2134 case CallingConv::ARM_AAPCS:
2135 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2136 case CallingConv::ARM_AAPCS_VFP:
2137 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2138 case CallingConv::Fast:
2139 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2140 case CallingConv::GHC:
2141 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2142 case CallingConv::PreserveMost:
2143 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2144 case CallingConv::PreserveAll:
2145 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2146 case CallingConv::CFGuard_Check:
2147 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2148 }
2149}
2150
2151SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2152 MVT LocVT, MVT ValVT, SDValue Val) const {
2153 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2154 Val);
2155 if (Subtarget->hasFullFP16()) {
2156 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2157 } else {
2158 Val = DAG.getNode(ISD::TRUNCATE, dl,
2159 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2160 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2161 }
2162 return Val;
2163}
2164
2165SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2166 MVT LocVT, MVT ValVT,
2167 SDValue Val) const {
2168 if (Subtarget->hasFullFP16()) {
2169 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2170 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2171 } else {
2172 Val = DAG.getNode(ISD::BITCAST, dl,
2173 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2174 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2175 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2176 }
2177 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2178}
2179
2180/// LowerCallResult - Lower the result values of a call into the
2181/// appropriate copies out of appropriate physical registers.
2182SDValue ARMTargetLowering::LowerCallResult(
2183 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2184 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2185 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2186 SDValue ThisVal) const {
2187 // Assign locations to each value returned by this call.
2188 SmallVector<CCValAssign, 16> RVLocs;
2189 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2190 *DAG.getContext());
2191 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2192
2193 // Copy all of the result registers out of their specified physreg.
2194 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2195 CCValAssign VA = RVLocs[i];
2196
2197 // Pass 'this' value directly from the argument to return value, to avoid
2198 // reg unit interference
2199 if (i == 0 && isThisReturn) {
2200 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2201, __extension__
__PRETTY_FUNCTION__))
2201 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2201, __extension__
__PRETTY_FUNCTION__))
;
2202 InVals.push_back(ThisVal);
2203 continue;
2204 }
2205
2206 SDValue Val;
2207 if (VA.needsCustom() &&
2208 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2209 // Handle f64 or half of a v2f64.
2210 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2211 InGlue);
2212 Chain = Lo.getValue(1);
2213 InGlue = Lo.getValue(2);
2214 VA = RVLocs[++i]; // skip ahead to next loc
2215 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2216 InGlue);
2217 Chain = Hi.getValue(1);
2218 InGlue = Hi.getValue(2);
2219 if (!Subtarget->isLittle())
2220 std::swap (Lo, Hi);
2221 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2222
2223 if (VA.getLocVT() == MVT::v2f64) {
2224 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2225 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2226 DAG.getConstant(0, dl, MVT::i32));
2227
2228 VA = RVLocs[++i]; // skip ahead to next loc
2229 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2230 Chain = Lo.getValue(1);
2231 InGlue = Lo.getValue(2);
2232 VA = RVLocs[++i]; // skip ahead to next loc
2233 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2234 Chain = Hi.getValue(1);
2235 InGlue = Hi.getValue(2);
2236 if (!Subtarget->isLittle())
2237 std::swap (Lo, Hi);
2238 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2239 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2240 DAG.getConstant(1, dl, MVT::i32));
2241 }
2242 } else {
2243 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2244 InGlue);
2245 Chain = Val.getValue(1);
2246 InGlue = Val.getValue(2);
2247 }
2248
2249 switch (VA.getLocInfo()) {
2250 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2250)
;
2251 case CCValAssign::Full: break;
2252 case CCValAssign::BCvt:
2253 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2254 break;
2255 }
2256
2257 // f16 arguments have their size extended to 4 bytes and passed as if they
2258 // had been copied to the LSBs of a 32-bit register.
2259 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2260 if (VA.needsCustom() &&
2261 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2262 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2263
2264 InVals.push_back(Val);
2265 }
2266
2267 return Chain;
2268}
2269
2270std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2271 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2272 bool IsTailCall, int SPDiff) const {
2273 SDValue DstAddr;
2274 MachinePointerInfo DstInfo;
2275 int32_t Offset = VA.getLocMemOffset();
2276 MachineFunction &MF = DAG.getMachineFunction();
2277
2278 if (IsTailCall) {
2279 Offset += SPDiff;
2280 auto PtrVT = getPointerTy(DAG.getDataLayout());
2281 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2282 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2283 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2284 DstInfo =
2285 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
2286 } else {
2287 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2288 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2289 StackPtr, PtrOff);
2290 DstInfo =
2291 MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset);
2292 }
2293
2294 return std::make_pair(DstAddr, DstInfo);
2295}
2296
2297void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2298 SDValue Chain, SDValue &Arg,
2299 RegsToPassVector &RegsToPass,
2300 CCValAssign &VA, CCValAssign &NextVA,
2301 SDValue &StackPtr,
2302 SmallVectorImpl<SDValue> &MemOpChains,
2303 bool IsTailCall,
2304 int SPDiff) const {
2305 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2306 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2307 unsigned id = Subtarget->isLittle() ? 0 : 1;
2308 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2309
2310 if (NextVA.isRegLoc())
2311 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2312 else {
2313 assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail
("NextVA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2313, __extension__ __PRETTY_FUNCTION__))
;
2314 if (!StackPtr.getNode())
2315 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2316 getPointerTy(DAG.getDataLayout()));
2317
2318 SDValue DstAddr;
2319 MachinePointerInfo DstInfo;
2320 std::tie(DstAddr, DstInfo) =
2321 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2322 MemOpChains.push_back(
2323 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2324 }
2325}
2326
2327static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2328 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2329 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
2330}
2331
2332/// LowerCall - Lowering a call into a callseq_start <-
2333/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2334/// nodes.
2335SDValue
2336ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2337 SmallVectorImpl<SDValue> &InVals) const {
2338 SelectionDAG &DAG = CLI.DAG;
2339 SDLoc &dl = CLI.DL;
2340 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2341 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2342 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2343 SDValue Chain = CLI.Chain;
2344 SDValue Callee = CLI.Callee;
2345 bool &isTailCall = CLI.IsTailCall;
2346 CallingConv::ID CallConv = CLI.CallConv;
2347 bool doesNotRet = CLI.DoesNotReturn;
2348 bool isVarArg = CLI.IsVarArg;
2349
2350 MachineFunction &MF = DAG.getMachineFunction();
2351 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2352 MachineFunction::CallSiteInfo CSInfo;
2353 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2354 bool isThisReturn = false;
2355 bool isCmseNSCall = false;
2356 bool isSibCall = false;
2357 bool PreferIndirect = false;
2358 bool GuardWithBTI = false;
2359
2360 // Lower 'returns_twice' calls to a pseudo-instruction.
2361 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2362 !Subtarget->noBTIAtReturnTwice())
2363 GuardWithBTI = AFI->branchTargetEnforcement();
2364
2365 // Determine whether this is a non-secure function call.
2366 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2367 isCmseNSCall = true;
2368
2369 // Disable tail calls if they're not supported.
2370 if (!Subtarget->supportsTailCall())
2371 isTailCall = false;
2372
2373 // For both the non-secure calls and the returns from a CMSE entry function,
2374 // the function needs to do some extra work afte r the call, or before the
2375 // return, respectively, thus it cannot end with atail call
2376 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2377 isTailCall = false;
2378
2379 if (isa<GlobalAddressSDNode>(Callee)) {
2380 // If we're optimizing for minimum size and the function is called three or
2381 // more times in this block, we can improve codesize by calling indirectly
2382 // as BLXr has a 16-bit encoding.
2383 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2384 if (CLI.CB) {
2385 auto *BB = CLI.CB->getParent();
2386 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2387 count_if(GV->users(), [&BB](const User *U) {
2388 return isa<Instruction>(U) &&
2389 cast<Instruction>(U)->getParent() == BB;
2390 }) > 2;
2391 }
2392 }
2393 if (isTailCall) {
2394 // Check if it's really possible to do a tail call.
2395 isTailCall = IsEligibleForTailCallOptimization(
2396 Callee, CallConv, isVarArg, isStructRet,
2397 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2398 PreferIndirect);
2399
2400 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2401 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2402 isSibCall = true;
2403
2404 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2405 // detected sibcalls.
2406 if (isTailCall)
2407 ++NumTailCalls;
2408 }
2409
2410 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2411 report_fatal_error("failed to perform tail call elimination on a call "
2412 "site marked musttail");
2413 // Analyze operands of the call, assigning locations to each operand.
2414 SmallVector<CCValAssign, 16> ArgLocs;
2415 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2416 *DAG.getContext());
2417 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2418
2419 // Get a count of how many bytes are to be pushed on the stack.
2420 unsigned NumBytes = CCInfo.getNextStackOffset();
2421
2422 // SPDiff is the byte offset of the call's argument area from the callee's.
2423 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2424 // by this amount for a tail call. In a sibling call it must be 0 because the
2425 // caller will deallocate the entire stack and the callee still expects its
2426 // arguments to begin at SP+0. Completely unused for non-tail calls.
2427 int SPDiff = 0;
2428
2429 if (isTailCall && !isSibCall) {
2430 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2431 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2432
2433 // Since callee will pop argument stack as a tail call, we must keep the
2434 // popped size 16-byte aligned.
2435 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2436 NumBytes = alignTo(NumBytes, StackAlign);
2437
2438 // SPDiff will be negative if this tail call requires more space than we
2439 // would automatically have in our incoming argument space. Positive if we
2440 // can actually shrink the stack.
2441 SPDiff = NumReusableBytes - NumBytes;
2442
2443 // If this call requires more stack than we have available from
2444 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2445 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2446 AFI->setArgRegsSaveSize(-SPDiff);
2447 }
2448
2449 if (isSibCall) {
2450 // For sibling tail calls, memory operands are available in our caller's stack.
2451 NumBytes = 0;
2452 } else {
2453 // Adjust the stack pointer for the new arguments...
2454 // These operations are automatically eliminated by the prolog/epilog pass
2455 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2456 }
2457
2458 SDValue StackPtr =
2459 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2460
2461 RegsToPassVector RegsToPass;
2462 SmallVector<SDValue, 8> MemOpChains;
2463
2464 // During a tail call, stores to the argument area must happen after all of
2465 // the function's incoming arguments have been loaded because they may alias.
2466 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2467 // there's no point in doing so repeatedly so this tracks whether that's
2468 // happened yet.
2469 bool AfterFormalArgLoads = false;
2470
2471 // Walk the register/memloc assignments, inserting copies/loads. In the case
2472 // of tail call optimization, arguments are handled later.
2473 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2474 i != e;
2475 ++i, ++realArgIdx) {
2476 CCValAssign &VA = ArgLocs[i];
2477 SDValue Arg = OutVals[realArgIdx];
2478 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2479 bool isByVal = Flags.isByVal();
2480
2481 // Promote the value if needed.
2482 switch (VA.getLocInfo()) {
2483 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2483)
;
2484 case CCValAssign::Full: break;
2485 case CCValAssign::SExt:
2486 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2487 break;
2488 case CCValAssign::ZExt:
2489 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2490 break;
2491 case CCValAssign::AExt:
2492 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2493 break;
2494 case CCValAssign::BCvt:
2495 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2496 break;
2497 }
2498
2499 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2500 Chain = DAG.getStackArgumentTokenFactor(Chain);
2501 AfterFormalArgLoads = true;
2502 }
2503
2504 // f16 arguments have their size extended to 4 bytes and passed as if they
2505 // had been copied to the LSBs of a 32-bit register.
2506 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2507 if (VA.needsCustom() &&
2508 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2509 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2510 } else {
2511 // f16 arguments could have been extended prior to argument lowering.
2512 // Mask them arguments if this is a CMSE nonsecure call.
2513 auto ArgVT = Outs[realArgIdx].ArgVT;
2514 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2515 auto LocBits = VA.getLocVT().getSizeInBits();
2516 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2517 SDValue Mask =
2518 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2519 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2520 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2521 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2522 }
2523 }
2524
2525 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2526 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2527 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2528 DAG.getConstant(0, dl, MVT::i32));
2529 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2530 DAG.getConstant(1, dl, MVT::i32));
2531
2532 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2533 StackPtr, MemOpChains, isTailCall, SPDiff);
2534
2535 VA = ArgLocs[++i]; // skip ahead to next loc
2536 if (VA.isRegLoc()) {
2537 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2538 StackPtr, MemOpChains, isTailCall, SPDiff);
2539 } else {
2540 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
2540, __extension__ __PRETTY_FUNCTION__))
;
2541 SDValue DstAddr;
2542 MachinePointerInfo DstInfo;
2543 std::tie(DstAddr, DstInfo) =
2544 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2545 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2546 }
2547 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2548 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2549 StackPtr, MemOpChains, isTailCall, SPDiff);
2550 } else if (VA.isRegLoc()) {
2551 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2552 Outs[0].VT == MVT::i32) {
2553 assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2554, __extension__
__PRETTY_FUNCTION__))
2554 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2554, __extension__
__PRETTY_FUNCTION__))
;
2555 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2556, __extension__
__PRETTY_FUNCTION__))
2556 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2556, __extension__
__PRETTY_FUNCTION__))
;
2557 isThisReturn = true;
2558 }
2559 const TargetOptions &Options = DAG.getTarget().Options;
2560 if (Options.EmitCallSiteInfo)
2561 CSInfo.emplace_back(VA.getLocReg(), i);
2562 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2563 } else if (isByVal) {
2564 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
2564, __extension__ __PRETTY_FUNCTION__))
;
2565 unsigned offset = 0;
2566
2567 // True if this byval aggregate will be split between registers
2568 // and memory.
2569 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2570 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2571
2572 if (CurByValIdx < ByValArgsCount) {
2573
2574 unsigned RegBegin, RegEnd;
2575 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2576
2577 EVT PtrVT =
2578 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2579 unsigned int i, j;
2580 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2581 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2582 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2583 SDValue Load =
2584 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2585 DAG.InferPtrAlign(AddArg));
2586 MemOpChains.push_back(Load.getValue(1));
2587 RegsToPass.push_back(std::make_pair(j, Load));
2588 }
2589
2590 // If parameter size outsides register area, "offset" value
2591 // helps us to calculate stack slot for remained part properly.
2592 offset = RegEnd - RegBegin;
2593
2594 CCInfo.nextInRegsParam();
2595 }
2596
2597 if (Flags.getByValSize() > 4*offset) {
2598 auto PtrVT = getPointerTy(DAG.getDataLayout());
2599 SDValue Dst;
2600 MachinePointerInfo DstInfo;
2601 std::tie(Dst, DstInfo) =
2602 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2603 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2604 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2605 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2606 MVT::i32);
2607 SDValue AlignNode =
2608 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2609
2610 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2611 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2612 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2613 Ops));
2614 }
2615 } else {
2616 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
2616, __extension__ __PRETTY_FUNCTION__))
;
2617 SDValue DstAddr;
2618 MachinePointerInfo DstInfo;
2619 std::tie(DstAddr, DstInfo) =
2620 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2621
2622 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2623 MemOpChains.push_back(Store);
2624 }
2625 }
2626
2627 if (!MemOpChains.empty())
2628 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2629
2630 // Build a sequence of copy-to-reg nodes chained together with token chain
2631 // and flag operands which copy the outgoing args into the appropriate regs.
2632 SDValue InGlue;
2633 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2634 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2635 RegsToPass[i].second, InGlue);
2636 InGlue = Chain.getValue(1);
2637 }
2638
2639 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2640 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2641 // node so that legalize doesn't hack it.
2642 bool isDirect = false;
2643
2644 const TargetMachine &TM = getTargetMachine();
2645 const Module *Mod = MF.getFunction().getParent();
2646 const GlobalValue *GVal = nullptr;
2647 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2648 GVal = G->getGlobal();
2649 bool isStub =
2650 !TM.shouldAssumeDSOLocal(*Mod, GVal) && Subtarget->isTargetMachO();
2651
2652 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2653 bool isLocalARMFunc = false;
2654 auto PtrVt = getPointerTy(DAG.getDataLayout());
2655
2656 if (Subtarget->genLongCalls()) {
2657 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2658, __extension__
__PRETTY_FUNCTION__))
2658 "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2658, __extension__
__PRETTY_FUNCTION__))
;
2659 // Handle a global address or an external symbol. If it's not one of
2660 // those, the target's already in a register, so we don't need to do
2661 // anything extra.
2662 if (isa<GlobalAddressSDNode>(Callee)) {
2663 // When generating execute-only code we use movw movt pair.
2664 // Currently execute-only is only available for architectures that
2665 // support movw movt, so we are safe to assume that.
2666 if (Subtarget->genExecuteOnly()) {
2667 assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() &&
"long-calls with execute-only requires movt and movw!") ? void
(0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2668, __extension__
__PRETTY_FUNCTION__))
2668 "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() &&
"long-calls with execute-only requires movt and movw!") ? void
(0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2668, __extension__
__PRETTY_FUNCTION__))
;
2669 ++NumMovwMovt;
2670 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2671 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2672 } else {
2673 // Create a constant pool entry for the callee address
2674 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2675 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
2676 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2677
2678 // Get the address of the callee into a register
2679 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2680 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2681 Callee = DAG.getLoad(
2682 PtrVt, dl, DAG.getEntryNode(), Addr,
2683 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2684 }
2685 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2686 const char *Sym = S->getSymbol();
2687
2688 // When generating execute-only code we use movw movt pair.
2689 // Currently execute-only is only available for architectures that
2690 // support movw movt, so we are safe to assume that.
2691 if (Subtarget->genExecuteOnly()) {
2692 assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() &&
"long-calls with execute-only requires movt and movw!") ? void
(0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2693, __extension__
__PRETTY_FUNCTION__))
2693 "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() &&
"long-calls with execute-only requires movt and movw!") ? void
(0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2693, __extension__
__PRETTY_FUNCTION__))
;
2694 ++NumMovwMovt;
2695 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2696 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2697 } else {
2698 // Create a constant pool entry for the callee address
2699 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2700 ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
2701 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2702
2703 // Get the address of the callee into a register
2704 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2705 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2706 Callee = DAG.getLoad(
2707 PtrVt, dl, DAG.getEntryNode(), Addr,
2708 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2709 }
2710 }
2711 } else if (isa<GlobalAddressSDNode>(Callee)) {
2712 if (!PreferIndirect) {
2713 isDirect = true;
2714 bool isDef = GVal->isStrongDefinitionForLinker();
2715
2716 // ARM call to a local ARM function is predicable.
2717 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2718 // tBX takes a register source operand.
2719 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2720 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() &&
"WrapperPIC use on non-MachO?") ? void (0) : __assert_fail (
"Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2720, __extension__
__PRETTY_FUNCTION__))
;
2721 Callee = DAG.getNode(
2722 ARMISD::WrapperPIC, dl, PtrVt,
2723 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2724 Callee = DAG.getLoad(
2725 PtrVt, dl, DAG.getEntryNode(), Callee,
2726 MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
2727 MachineMemOperand::MODereferenceable |
2728 MachineMemOperand::MOInvariant);
2729 } else if (Subtarget->isTargetCOFF()) {
2730 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2731, __extension__
__PRETTY_FUNCTION__))
2731 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2731, __extension__
__PRETTY_FUNCTION__))
;
2732 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2733 if (GVal->hasDLLImportStorageClass())
2734 TargetFlags = ARMII::MO_DLLIMPORT;
2735 else if (!TM.shouldAssumeDSOLocal(*GVal->getParent(), GVal))
2736 TargetFlags = ARMII::MO_COFFSTUB;
2737 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2738 TargetFlags);
2739 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2740 Callee =
2741 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2742 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2743 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2744 } else {
2745 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2746 }
2747 }
2748 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2749 isDirect = true;
2750 // tBX takes a register source operand.
2751 const char *Sym = S->getSymbol();
2752 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2753 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2754 ARMConstantPoolValue *CPV =
2755 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2756 ARMPCLabelIndex, 4);
2757 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2758 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2759 Callee = DAG.getLoad(
2760 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2761 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2762 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2763 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2764 } else {
2765 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2766 }
2767 }
2768
2769 if (isCmseNSCall) {
2770 assert(!isARMFunc && !isDirect &&(static_cast <bool> (!isARMFunc && !isDirect &&
"Cannot handle call to ARM function or direct call") ? void (
0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2771, __extension__
__PRETTY_FUNCTION__))
2771 "Cannot handle call to ARM function or direct call")(static_cast <bool> (!isARMFunc && !isDirect &&
"Cannot handle call to ARM function or direct call") ? void (
0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2771, __extension__
__PRETTY_FUNCTION__))
;
2772 if (NumBytes > 0) {
2773 DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
2774 "call to non-secure function would "
2775 "require passing arguments on stack",
2776 dl.getDebugLoc());
2777 DAG.getContext()->diagnose(Diag);
2778 }
2779 if (isStructRet) {
2780 DiagnosticInfoUnsupported Diag(
2781 DAG.getMachineFunction().getFunction(),
2782 "call to non-secure function would return value through pointer",
2783 dl.getDebugLoc());
2784 DAG.getContext()->diagnose(Diag);
2785 }
2786 }
2787
2788 // FIXME: handle tail calls differently.
2789 unsigned CallOpc;
2790 if (Subtarget->isThumb()) {
2791 if (GuardWithBTI)
2792 CallOpc = ARMISD::t2CALL_BTI;
2793 else if (isCmseNSCall)
2794 CallOpc = ARMISD::tSECALL;
2795 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2796 CallOpc = ARMISD::CALL_NOLINK;
2797 else
2798 CallOpc = ARMISD::CALL;
2799 } else {
2800 if (!isDirect && !Subtarget->hasV5TOps())
2801 CallOpc = ARMISD::CALL_NOLINK;
2802 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2803 // Emit regular call when code size is the priority
2804 !Subtarget->hasMinSize())
2805 // "mov lr, pc; b _foo" to avoid confusing the RSP
2806 CallOpc = ARMISD::CALL_NOLINK;
2807 else
2808 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2809 }
2810
2811 // We don't usually want to end the call-sequence here because we would tidy
2812 // the frame up *after* the call, however in the ABI-changing tail-call case
2813 // we've carefully laid out the parameters so that when sp is reset they'll be
2814 // in the correct location.
2815 if (isTailCall && !isSibCall) {
2816 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2817 InGlue = Chain.getValue(1);
2818 }
2819
2820 std::vector<SDValue> Ops;
2821 Ops.push_back(Chain);
2822 Ops.push_back(Callee);
2823
2824 if (isTailCall) {
2825 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2826 }
2827
2828 // Add argument registers to the end of the list so that they are known live
2829 // into the call.
2830 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2831 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2832 RegsToPass[i].second.getValueType()));
2833
2834 // Add a register mask operand representing the call-preserved registers.
2835 const uint32_t *Mask;
2836 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2837 if (isThisReturn) {
2838 // For 'this' returns, use the R0-preserving mask if applicable
2839 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2840 if (!Mask) {
2841 // Set isThisReturn to false if the calling convention is not one that
2842 // allows 'returned' to be modeled in this way, so LowerCallResult does
2843 // not try to pass 'this' straight through
2844 isThisReturn = false;
2845 Mask = ARI->getCallPreservedMask(MF, CallConv);
2846 }
2847 } else
2848 Mask = ARI->getCallPreservedMask(MF, CallConv);
2849
2850 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2850, __extension__
__PRETTY_FUNCTION__))
;
2851 Ops.push_back(DAG.getRegisterMask(Mask));
2852
2853 if (InGlue.getNode())
2854 Ops.push_back(InGlue);
2855
2856 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2857 if (isTailCall) {
2858 MF.getFrameInfo().setHasTailCall();
2859 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2860 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2861 return Ret;
2862 }
2863
2864 // Returns a chain and a flag for retval copy to use.
2865 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2866 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2867 InGlue = Chain.getValue(1);
2868 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2869
2870 // If we're guaranteeing tail-calls will be honoured, the callee must
2871 // pop its own argument stack on return. But this call is *not* a tail call so
2872 // we need to undo that after it returns to restore the status-quo.
2873 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2874 uint64_t CalleePopBytes =
2875 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2876
2877 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2878 if (!Ins.empty())
2879 InGlue = Chain.getValue(1);
2880
2881 // Handle result values, copying them out of physregs into vregs that we
2882 // return.
2883 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2884 InVals, isThisReturn,
2885 isThisReturn ? OutVals[0] : SDValue());
2886}
2887
2888/// HandleByVal - Every parameter *after* a byval parameter is passed
2889/// on the stack. Remember the next parameter register to allocate,
2890/// and then confiscate the rest of the parameter registers to insure
2891/// this.
2892void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2893 Align Alignment) const {
2894 // Byval (as with any stack) slots are always at least 4 byte aligned.
2895 Alignment = std::max(Alignment, Align(4));
2896
2897 unsigned Reg = State->AllocateReg(GPRArgRegs);
2898 if (!Reg)
2899 return;
2900
2901 unsigned AlignInRegs = Alignment.value() / 4;
2902 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2903 for (unsigned i = 0; i < Waste; ++i)
2904 Reg = State->AllocateReg(GPRArgRegs);
2905
2906 if (!Reg)
2907 return;
2908
2909 unsigned Excess = 4 * (ARM::R4 - Reg);
2910
2911 // Special case when NSAA != SP and parameter size greater than size of
2912 // all remained GPR regs. In that case we can't split parameter, we must
2913 // send it to stack. We also must set NCRN to R4, so waste all
2914 // remained registers.
2915 const unsigned NSAAOffset = State->getNextStackOffset();
2916 if (NSAAOffset != 0 && Size > Excess) {
2917 while (State->AllocateReg(GPRArgRegs))
2918 ;
2919 return;
2920 }
2921
2922 // First register for byval parameter is the first register that wasn't
2923 // allocated before this method call, so it would be "reg".
2924 // If parameter is small enough to be saved in range [reg, r4), then
2925 // the end (first after last) register would be reg + param-size-in-regs,
2926 // else parameter would be splitted between registers and stack,
2927 // end register would be r4 in this case.
2928 unsigned ByValRegBegin = Reg;
2929 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2930 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2931 // Note, first register is allocated in the beginning of function already,
2932 // allocate remained amount of registers we need.
2933 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2934 State->AllocateReg(GPRArgRegs);
2935 // A byval parameter that is split between registers and memory needs its
2936 // size truncated here.
2937 // In the case where the entire structure fits in registers, we set the
2938 // size in memory to zero.
2939 Size = std::max<int>(Size - Excess, 0);
2940}
2941
2942/// MatchingStackOffset - Return true if the given stack call argument is
2943/// already available in the same position (relatively) of the caller's
2944/// incoming argument stack.
2945static
2946bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2947 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2948 const TargetInstrInfo *TII) {
2949 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2950 int FI = std::numeric_limits<int>::max();
2951 if (Arg.getOpcode() == ISD::CopyFromReg) {
2952 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2953 if (!VR.isVirtual())
2954 return false;
2955 MachineInstr *Def = MRI->getVRegDef(VR);
2956 if (!Def)
2957 return false;
2958 if (!Flags.isByVal()) {
2959 if (!TII->isLoadFromStackSlot(*Def, FI))
2960 return false;
2961 } else {
2962 return false;
2963 }
2964 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2965 if (Flags.isByVal())
2966 // ByVal argument is passed in as a pointer but it's now being
2967 // dereferenced. e.g.
2968 // define @foo(%struct.X* %A) {
2969 // tail call @bar(%struct.X* byval %A)
2970 // }
2971 return false;
2972 SDValue Ptr = Ld->getBasePtr();
2973 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2974 if (!FINode)
2975 return false;
2976 FI = FINode->getIndex();
2977 } else
2978 return false;
2979
2980 assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int>
::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2980, __extension__
__PRETTY_FUNCTION__))
;
2981 if (!MFI.isFixedObjectIndex(FI))
2982 return false;
2983 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2984}
2985
2986/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2987/// for tail call optimization. Targets which want to do tail call
2988/// optimization should implement this function.
2989bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2990 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2991 bool isCalleeStructRet, bool isCallerStructRet,
2992 const SmallVectorImpl<ISD::OutputArg> &Outs,
2993 const SmallVectorImpl<SDValue> &OutVals,
2994 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
2995 const bool isIndirect) const {
2996 MachineFunction &MF = DAG.getMachineFunction();
2997 const Function &CallerF = MF.getFunction();
2998 CallingConv::ID CallerCC = CallerF.getCallingConv();
2999
3000 assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ?
void (0) : __assert_fail ("Subtarget->supportsTailCall()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3000, __extension__
__PRETTY_FUNCTION__))
;
3001
3002 // Indirect tail calls cannot be optimized for Thumb1 if the args
3003 // to the call take up r0-r3. The reason is that there are no legal registers
3004 // left to hold the pointer to the function to be called.
3005 // Similarly, if the function uses return address sign and authentication,
3006 // r12 is needed to hold the PAC and is not available to hold the callee
3007 // address.
3008 if (Outs.size() >= 4 &&
3009 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3010 if (Subtarget->isThumb1Only())
3011 return false;
3012 // Conservatively assume the function spills LR.
3013 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
3014 return false;
3015 }
3016
3017 // Look for obvious safe cases to perform tail call optimization that do not
3018 // require ABI changes. This is what gcc calls sibcall.
3019
3020 // Exception-handling functions need a special set of instructions to indicate
3021 // a return to the hardware. Tail-calling another function would probably
3022 // break this.
3023 if (CallerF.hasFnAttribute("interrupt"))
3024 return false;
3025
3026 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3027 return CalleeCC == CallerCC;
3028
3029 // Also avoid sibcall optimization if either caller or callee uses struct
3030 // return semantics.
3031 if (isCalleeStructRet || isCallerStructRet)
3032 return false;
3033
3034 // Externally-defined functions with weak linkage should not be
3035 // tail-called on ARM when the OS does not support dynamic
3036 // pre-emption of symbols, as the AAELF spec requires normal calls
3037 // to undefined weak functions to be replaced with a NOP or jump to the
3038 // next instruction. The behaviour of branch instructions in this
3039 // situation (as used for tail calls) is implementation-defined, so we
3040 // cannot rely on the linker replacing the tail call with a return.
3041 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3042 const GlobalValue *GV = G->getGlobal();
3043 const Triple &TT = getTargetMachine().getTargetTriple();
3044 if (GV->hasExternalWeakLinkage() &&
3045 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3046 return false;
3047 }
3048
3049 // Check that the call results are passed in the same way.
3050 LLVMContext &C = *DAG.getContext();
3051 if (!CCState::resultsCompatible(
3052 getEffectiveCallingConv(CalleeCC, isVarArg),
3053 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3054 CCAssignFnForReturn(CalleeCC, isVarArg),
3055 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3056 return false;
3057 // The callee has to preserve all registers the caller needs to preserve.
3058 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3059 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3060 if (CalleeCC != CallerCC) {
3061 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3062 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3063 return false;
3064 }
3065
3066 // If Caller's vararg or byval argument has been split between registers and
3067 // stack, do not perform tail call, since part of the argument is in caller's
3068 // local frame.
3069 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3070 if (AFI_Caller->getArgRegsSaveSize())
3071 return false;
3072
3073 // If the callee takes no arguments then go on to check the results of the
3074 // call.
3075 if (!Outs.empty()) {
3076 // Check if stack adjustment is needed. For now, do not do this if any
3077 // argument is passed on the stack.
3078 SmallVector<CCValAssign, 16> ArgLocs;
3079 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3080 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3081 if (CCInfo.getNextStackOffset()) {
3082 // Check if the arguments are already laid out in the right way as
3083 // the caller's fixed stack objects.
3084 MachineFrameInfo &MFI = MF.getFrameInfo();
3085 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3086 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3087 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3088 i != e;
3089 ++i, ++realArgIdx) {
3090 CCValAssign &VA = ArgLocs[i];
3091 EVT RegVT = VA.getLocVT();
3092 SDValue Arg = OutVals[realArgIdx];
3093 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3094 if (VA.getLocInfo() == CCValAssign::Indirect)
3095 return false;
3096 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3097 // f64 and vector types are split into multiple registers or
3098 // register/stack-slot combinations. The types will not match
3099 // the registers; give up on memory f64 refs until we figure
3100 // out what to do about this.
3101 if (!VA.isRegLoc())
3102 return false;
3103 if (!ArgLocs[++i].isRegLoc())
3104 return false;
3105 if (RegVT == MVT::v2f64) {
3106 if (!ArgLocs[++i].isRegLoc())
3107 return false;
3108 if (!ArgLocs[++i].isRegLoc())
3109 return false;
3110 }
3111 } else if (!VA.isRegLoc()) {
3112 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3113 MFI, MRI, TII))
3114 return false;
3115 }
3116 }
3117 }
3118
3119 const MachineRegisterInfo &MRI = MF.getRegInfo();
3120 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3121 return false;
3122 }
3123
3124 return true;
3125}
3126
3127bool
3128ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3129 MachineFunction &MF, bool isVarArg,
3130 const SmallVectorImpl<ISD::OutputArg> &Outs,
3131 LLVMContext &Context) const {
3132 SmallVector<CCValAssign, 16> RVLocs;
3133 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3134 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3135}
3136
3137static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
3138 const SDLoc &DL, SelectionDAG &DAG) {
3139 const MachineFunction &MF = DAG.getMachineFunction();
3140 const Function &F = MF.getFunction();
3141
3142 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3143
3144 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3145 // version of the "preferred return address". These offsets affect the return
3146 // instruction if this is a return from PL1 without hypervisor extensions.
3147 // IRQ/FIQ: +4 "subs pc, lr, #4"
3148 // SWI: 0 "subs pc, lr, #0"
3149 // ABORT: +4 "subs pc, lr, #4"
3150 // UNDEF: +4/+2 "subs pc, lr, #0"
3151 // UNDEF varies depending on where the exception came from ARM or Thumb
3152 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3153
3154 int64_t LROffset;
3155 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3156 IntKind == "ABORT")
3157 LROffset = 4;
3158 else if (IntKind == "SWI" || IntKind == "UNDEF")
3159 LROffset = 0;
3160 else
3161 report_fatal_error("Unsupported interrupt attribute. If present, value "
3162 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3163
3164 RetOps.insert(RetOps.begin() + 1,
3165 DAG.getConstant(LROffset, DL, MVT::i32, false));
3166
3167 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3168}
3169
3170SDValue
3171ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3172 bool isVarArg,
3173 const SmallVectorImpl<ISD::OutputArg> &Outs,
3174 const SmallVectorImpl<SDValue> &OutVals,
3175 const SDLoc &dl, SelectionDAG &DAG) const {
3176 // CCValAssign - represent the assignment of the return value to a location.
3177 SmallVector<CCValAssign, 16> RVLocs;
3178
3179 // CCState - Info about the registers and stack slots.
3180 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3181 *DAG.getContext());
3182
3183 // Analyze outgoing return values.
3184 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3185
3186 SDValue Glue;
3187 SmallVector<SDValue, 4> RetOps;
3188 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3189 bool isLittleEndian = Subtarget->isLittle();
3190
3191 MachineFunction &MF = DAG.getMachineFunction();
3192 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3193 AFI->setReturnRegsCount(RVLocs.size());
3194
3195 // Report error if cmse entry function returns structure through first ptr arg.
3196 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3197 // Note: using an empty SDLoc(), as the first line of the function is a
3198 // better place to report than the last line.
3199 DiagnosticInfoUnsupported Diag(
3200 DAG.getMachineFunction().getFunction(),
3201 "secure entry function would return value through pointer",
3202 SDLoc().getDebugLoc());
3203 DAG.getContext()->diagnose(Diag);
3204 }
3205
3206 // Copy the result values into the output registers.
3207 for (unsigned i = 0, realRVLocIdx = 0;
3208 i != RVLocs.size();
3209 ++i, ++realRVLocIdx) {
3210 CCValAssign &VA = RVLocs[i];
3211 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3211, __extension__
__PRETTY_FUNCTION__))
;
3212
3213 SDValue Arg = OutVals[realRVLocIdx];
3214 bool ReturnF16 = false;
3215
3216 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3217 // Half-precision return values can be returned like this:
3218 //
3219 // t11 f16 = fadd ...
3220 // t12: i16 = bitcast t11
3221 // t13: i32 = zero_extend t12
3222 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3223 //
3224 // to avoid code generation for bitcasts, we simply set Arg to the node
3225 // that produces the f16 value, t11 in this case.
3226 //
3227 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3228 SDValue ZE = Arg.getOperand(0);
3229 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3230 SDValue BC = ZE.getOperand(0);
3231 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3232 Arg = BC.getOperand(0);
3233 ReturnF16 = true;
3234 }
3235 }
3236 }
3237 }
3238
3239 switch (VA.getLocInfo()) {
3240 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3240)
;
3241 case CCValAssign::Full: break;
3242 case CCValAssign::BCvt:
3243 if (!ReturnF16)
3244 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3245 break;
3246 }
3247
3248 // Mask f16 arguments if this is a CMSE nonsecure entry.
3249 auto RetVT = Outs[realRVLocIdx].ArgVT;
3250 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3251 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3252 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3253 } else {
3254 auto LocBits = VA.getLocVT().getSizeInBits();
3255 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3256 SDValue Mask =
3257 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3258 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3259 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3260 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3261 }
3262 }
3263
3264 if (VA.needsCustom() &&
3265 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3266 if (VA.getLocVT() == MVT::v2f64) {
3267 // Extract the first half and return it in two registers.
3268 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3269 DAG.getConstant(0, dl, MVT::i32));
3270 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3271 DAG.getVTList(MVT::i32, MVT::i32), Half);
3272
3273 Chain =
3274 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3275 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3276 Glue = Chain.getValue(1);
3277 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3278 VA = RVLocs[++i]; // skip ahead to next loc
3279 Chain =
3280 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3281 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3282 Glue = Chain.getValue(1);
3283 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3284 VA = RVLocs[++i]; // skip ahead to next loc
3285
3286 // Extract the 2nd half and fall through to handle it as an f64 value.
3287 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3288 DAG.getConstant(1, dl, MVT::i32));
3289 }
3290 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3291 // available.
3292 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3293 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3294 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3295 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3296 Glue = Chain.getValue(1);
3297 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3298 VA = RVLocs[++i]; // skip ahead to next loc
3299 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3300 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3301 } else
3302 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3303
3304 // Guarantee that all emitted copies are
3305 // stuck together, avoiding something bad.
3306 Glue = Chain.getValue(1);
3307 RetOps.push_back(DAG.getRegister(
3308 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3309 }
3310 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3311 const MCPhysReg *I =
3312 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3313 if (I) {
3314 for (; *I; ++I) {
3315 if (ARM::GPRRegClass.contains(*I))
3316 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3317 else if (ARM::DPRRegClass.contains(*I))
3318 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3319 else
3320 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3320)
;
3321 }
3322 }
3323
3324 // Update chain and glue.
3325 RetOps[0] = Chain;
3326 if (Glue.getNode())
3327 RetOps.push_back(Glue);
3328
3329 // CPUs which aren't M-class use a special sequence to return from
3330 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3331 // though we use "subs pc, lr, #N").
3332 //
3333 // M-class CPUs actually use a normal return sequence with a special
3334 // (hardware-provided) value in LR, so the normal code path works.
3335 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3336 !Subtarget->isMClass()) {
3337 if (Subtarget->isThumb1Only())
3338 report_fatal_error("interrupt attribute is not supported in Thumb1");
3339 return LowerInterruptReturn(RetOps, dl, DAG);
3340 }
3341
3342 ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE :
3343 ARMISD::RET_GLUE;
3344 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3345}
3346
3347bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3348 if (N->getNumValues() != 1)
3349 return false;
3350 if (!N->hasNUsesOfValue(1, 0))
3351 return false;
3352
3353 SDValue TCChain = Chain;
3354 SDNode *Copy = *N->use_begin();
3355 if (Copy->getOpcode() == ISD::CopyToReg) {
3356 // If the copy has a glue operand, we conservatively assume it isn't safe to
3357 // perform a tail call.
3358 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3359 return false;
3360 TCChain = Copy->getOperand(0);
3361 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3362 SDNode *VMov = Copy;
3363 // f64 returned in a pair of GPRs.
3364 SmallPtrSet<SDNode*, 2> Copies;
3365 for (SDNode *U : VMov->uses()) {
3366 if (U->getOpcode() != ISD::CopyToReg)
3367 return false;
3368 Copies.insert(U);
3369 }
3370 if (Copies.size() > 2)
3371 return false;
3372
3373 for (SDNode *U : VMov->uses()) {
3374 SDValue UseChain = U->getOperand(0);
3375 if (Copies.count(UseChain.getNode()))
3376 // Second CopyToReg
3377 Copy = U;
3378 else {
3379 // We are at the top of this chain.
3380 // If the copy has a glue operand, we conservatively assume it
3381 // isn't safe to perform a tail call.
3382 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3383 return false;
3384 // First CopyToReg
3385 TCChain = UseChain;
3386 }
3387 }
3388 } else if (Copy->getOpcode() == ISD::BITCAST) {
3389 // f32 returned in a single GPR.
3390 if (!Copy->hasOneUse())
3391 return false;
3392 Copy = *Copy->use_begin();
3393 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3394 return false;
3395 // If the copy has a glue operand, we conservatively assume it isn't safe to
3396 // perform a tail call.
3397 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3398 return false;
3399 TCChain = Copy->getOperand(0);
3400 } else {
3401 return false;
3402 }
3403
3404 bool HasRet = false;
3405 for (const SDNode *U : Copy->uses()) {
3406 if (U->getOpcode() != ARMISD::RET_GLUE &&
3407 U->getOpcode() != ARMISD::INTRET_GLUE)
3408 return false;
3409 HasRet = true;
3410 }
3411
3412 if (!HasRet)
3413 return false;
3414
3415 Chain = TCChain;
3416 return true;
3417}
3418
3419bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3420 if (!Subtarget->supportsTailCall())
3421 return false;
3422
3423 if (!CI->isTailCall())
3424 return false;
3425
3426 return true;
3427}
3428
3429// Trying to write a 64 bit value so need to split into two 32 bit values first,
3430// and pass the lower and high parts through.
3431static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
3432 SDLoc DL(Op);
3433 SDValue WriteValue = Op->getOperand(2);
3434
3435 // This function is only supposed to be called for i64 type argument.
3436 assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3437, __extension__
__PRETTY_FUNCTION__))
3437 && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3437, __extension__
__PRETTY_FUNCTION__))
;
3438
3439 SDValue Lo, Hi;
3440 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3441 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3442 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3443}
3444
3445// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3446// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3447// one of the above mentioned nodes. It has to be wrapped because otherwise
3448// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3449// be used to form addressing mode. These wrapped nodes will be selected
3450// into MOVi.
3451SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3452 SelectionDAG &DAG) const {
3453 EVT PtrVT = Op.getValueType();
3454 // FIXME there is no actual debug info here
3455 SDLoc dl(Op);
3456 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3457 SDValue Res;
3458
3459 // When generating execute-only code Constant Pools must be promoted to the
3460 // global data section. It's a bit ugly that we can't share them across basic
3461 // blocks, but this way we guarantee that execute-only behaves correct with
3462 // position-independent addressing modes.
3463 if (Subtarget->genExecuteOnly()) {
3464 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3465 auto T = const_cast<Type*>(CP->getType());
3466 auto C = const_cast<Constant*>(CP->getConstVal());
3467 auto M = const_cast<Module*>(DAG.getMachineFunction().
3468 getFunction().getParent());
3469 auto GV = new GlobalVariable(
3470 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3471 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3472 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3473 Twine(AFI->createPICLabelUId())
3474 );
3475 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3476 dl, PtrVT);
3477 return LowerGlobalAddress(GA, DAG);
3478 }
3479
3480 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3481 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3482 Align CPAlign = CP->getAlign();
3483 if (Subtarget->isThumb1Only())
3484 CPAlign = std::max(CPAlign, Align(4));
3485 if (CP->isMachineConstantPoolEntry())
3486 Res =
3487 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3488 else
3489 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3490 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3491}
3492
3493unsigned ARMTargetLowering::getJumpTableEncoding() const {
3494 return MachineJumpTableInfo::EK_Inline;
3495}
3496
3497SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3498 SelectionDAG &DAG) const {
3499 MachineFunction &MF = DAG.getMachineFunction();
3500 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3501 unsigned ARMPCLabelIndex = 0;
3502 SDLoc DL(Op);
3503 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3504 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3505 SDValue CPAddr;
3506 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3507 if (!IsPositionIndependent) {
3508 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3509 } else {
3510 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3511 ARMPCLabelIndex = AFI->createPICLabelUId();
3512 ARMConstantPoolValue *CPV =
3513 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3514 ARMCP::CPBlockAddress, PCAdj);
3515 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3516 }
3517 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3518 SDValue Result = DAG.getLoad(
3519 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3520 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3521 if (!IsPositionIndependent)
3522 return Result;
3523 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3524 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3525}
3526
3527/// Convert a TLS address reference into the correct sequence of loads
3528/// and calls to compute the variable's address for Darwin, and return an
3529/// SDValue containing the final node.
3530
3531/// Darwin only has one TLS scheme which must be capable of dealing with the
3532/// fully general situation, in the worst case. This means:
3533/// + "extern __thread" declaration.
3534/// + Defined in a possibly unknown dynamic library.
3535///
3536/// The general system is that each __thread variable has a [3 x i32] descriptor
3537/// which contains information used by the runtime to calculate the address. The
3538/// only part of this the compiler needs to know about is the first word, which
3539/// contains a function pointer that must be called with the address of the
3540/// entire descriptor in "r0".
3541///
3542/// Since this descriptor may be in a different unit, in general access must
3543/// proceed along the usual ARM rules. A common sequence to produce is:
3544///
3545/// movw rT1, :lower16:_var$non_lazy_ptr
3546/// movt rT1, :upper16:_var$non_lazy_ptr
3547/// ldr r0, [rT1]
3548/// ldr rT2, [r0]
3549/// blx rT2
3550/// [...address now in r0...]
3551SDValue
3552ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3553 SelectionDAG &DAG) const {
3554 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3555, __extension__
__PRETTY_FUNCTION__))
3555 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3555, __extension__
__PRETTY_FUNCTION__))
;
3556 SDLoc DL(Op);
3557
3558 // First step is to get the address of the actua global symbol. This is where
3559 // the TLS descriptor lives.
3560 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3561
3562 // The first entry in the descriptor is a function pointer that we must call
3563 // to obtain the address of the variable.
3564 SDValue Chain = DAG.getEntryNode();
3565 SDValue FuncTLVGet = DAG.getLoad(
3566 MVT::i32, DL, Chain, DescAddr,
3567 MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
3568 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
3569 MachineMemOperand::MOInvariant);
3570 Chain = FuncTLVGet.getValue(1);
3571
3572 MachineFunction &F = DAG.getMachineFunction();
3573 MachineFrameInfo &MFI = F.getFrameInfo();
3574 MFI.setAdjustsStack(true);
3575
3576 // TLS calls preserve all registers except those that absolutely must be
3577 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3578 // silly).
3579 auto TRI =
3580 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3581 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3582 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3583
3584 // Finally, we can make the call. This is just a degenerate version of a
3585 // normal AArch64 call node: r0 takes the address of the descriptor, and
3586 // returns the address of the variable in this thread.
3587 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3588 Chain =
3589 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3590 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3591 DAG.getRegisterMask(Mask), Chain.getValue(1));
3592 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3593}
3594
3595SDValue
3596ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3597 SelectionDAG &DAG) const {
3598 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3598, __extension__
__PRETTY_FUNCTION__))
;
3599
3600 SDValue Chain = DAG.getEntryNode();
3601 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3602 SDLoc DL(Op);
3603
3604 // Load the current TEB (thread environment block)
3605 SDValue Ops[] = {Chain,
3606 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3607 DAG.getTargetConstant(15, DL, MVT::i32),
3608 DAG.getTargetConstant(0, DL, MVT::i32),
3609 DAG.getTargetConstant(13, DL, MVT::i32),
3610 DAG.getTargetConstant(0, DL, MVT::i32),
3611 DAG.getTargetConstant(2, DL, MVT::i32)};
3612 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3613 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3614
3615 SDValue TEB = CurrentTEB.getValue(0);
3616 Chain = CurrentTEB.getValue(1);
3617
3618 // Load the ThreadLocalStoragePointer from the TEB
3619 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3620 SDValue TLSArray =
3621 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3622 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3623
3624 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3625 // offset into the TLSArray.
3626
3627 // Load the TLS index from the C runtime
3628 SDValue TLSIndex =
3629 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3630 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3631 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3632
3633 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3634 DAG.getConstant(2, DL, MVT::i32));
3635 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3636 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3637 MachinePointerInfo());
3638
3639 // Get the offset of the start of the .tls section (section base)
3640 const auto *GA = cast<GlobalAddressSDNode>(Op);
3641 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3642 SDValue Offset = DAG.getLoad(
3643 PtrVT, DL, Chain,
3644 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3645 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3646 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3647
3648 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3649}
3650
3651// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3652SDValue
3653ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3654 SelectionDAG &DAG) const {
3655 SDLoc dl(GA);
3656 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3657 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3658 MachineFunction &MF = DAG.getMachineFunction();
3659 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3660 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3661 ARMConstantPoolValue *CPV =
3662 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3663 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3664 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3665 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3666 Argument = DAG.getLoad(
3667 PtrVT, dl, DAG.getEntryNode(), Argument,
3668 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3669 SDValue Chain = Argument.getValue(1);
3670
3671 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3672 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3673
3674 // call __tls_get_addr.
3675 ArgListTy Args;
3676 ArgListEntry Entry;
3677 Entry.Node = Argument;
3678 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3679 Args.push_back(Entry);
3680
3681 // FIXME: is there useful debug info available here?
3682 TargetLowering::CallLoweringInfo CLI(DAG);
3683 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3684 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
3685 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3686
3687 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3688 return CallResult.first;
3689}
3690
3691// Lower ISD::GlobalTLSAddress using the "initial exec" or
3692// "local exec" model.
3693SDValue
3694ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3695 SelectionDAG &DAG,
3696 TLSModel::Model model) const {
3697 const GlobalValue *GV = GA->getGlobal();
3698 SDLoc dl(GA);
3699 SDValue Offset;
3700 SDValue Chain = DAG.getEntryNode();
3701 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3702 // Get the Thread Pointer
3703 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3704
3705 if (model == TLSModel::InitialExec) {
3706 MachineFunction &MF = DAG.getMachineFunction();
3707 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3708 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3709 // Initial exec model.
3710 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3711 ARMConstantPoolValue *CPV =
3712 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3713 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
3714 true);
3715 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3716 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3717 Offset = DAG.getLoad(
3718 PtrVT, dl, Chain, Offset,
3719 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3720 Chain = Offset.getValue(1);
3721
3722 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3723 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3724
3725 Offset = DAG.getLoad(
3726 PtrVT, dl, Chain, Offset,
3727 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3728 } else {
3729 // local exec model
3730 assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void
(0) : __assert_fail ("model == TLSModel::LocalExec", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3730, __extension__ __PRETTY_FUNCTION__))
;
3731 ARMConstantPoolValue *CPV =
3732 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
3733 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3734 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3735 Offset = DAG.getLoad(
3736 PtrVT, dl, Chain, Offset,
3737 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3738 }
3739
3740 // The address of the thread local variable is the add of the thread
3741 // pointer with the offset of the variable.
3742 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3743}
3744
3745SDValue
3746ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3747 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3748 if (DAG.getTarget().useEmulatedTLS())
3749 return LowerToTLSEmulatedModel(GA, DAG);
3750
3751 if (Subtarget->isTargetDarwin())
3752 return LowerGlobalTLSAddressDarwin(Op, DAG);
3753
3754 if (Subtarget->isTargetWindows())
3755 return LowerGlobalTLSAddressWindows(Op, DAG);
3756
3757 // TODO: implement the "local dynamic" model
3758 assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() &&
"Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3758, __extension__
__PRETTY_FUNCTION__))
;
3759 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3760
3761 switch (model) {
3762 case TLSModel::GeneralDynamic:
3763 case TLSModel::LocalDynamic:
3764 return LowerToTLSGeneralDynamicModel(GA, DAG);
3765 case TLSModel::InitialExec:
3766 case TLSModel::LocalExec:
3767 return LowerToTLSExecModels(GA, DAG, model);
3768 }
3769 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3769)
;
3770}
3771
3772/// Return true if all users of V are within function F, looking through
3773/// ConstantExprs.
3774static bool allUsersAreInFunction(const Value *V, const Function *F) {
3775 SmallVector<const User*,4> Worklist(V->users());
3776 while (!Worklist.empty()) {
3777 auto *U = Worklist.pop_back_val();
3778 if (isa<ConstantExpr>(U)) {
3779 append_range(Worklist, U->users());
3780 continue;
3781 }
3782
3783 auto *I = dyn_cast<Instruction>(U);
3784 if (!I || I->getParent()->getParent() != F)
3785 return false;
3786 }
3787 return true;
3788}
3789
3790static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
3791 const GlobalValue *GV, SelectionDAG &DAG,
3792 EVT PtrVT, const SDLoc &dl) {
3793 // If we're creating a pool entry for a constant global with unnamed address,
3794 // and the global is small enough, we can emit it inline into the constant pool
3795 // to save ourselves an indirection.
3796 //
3797 // This is a win if the constant is only used in one function (so it doesn't
3798 // need to be duplicated) or duplicating the constant wouldn't increase code
3799 // size (implying the constant is no larger than 4 bytes).
3800 const Function &F = DAG.getMachineFunction().getFunction();
3801
3802 // We rely on this decision to inline being idemopotent and unrelated to the
3803 // use-site. We know that if we inline a variable at one use site, we'll
3804 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3805 // doesn't know about this optimization, so bail out if it's enabled else
3806 // we could decide to inline here (and thus never emit the GV) but require
3807 // the GV from fast-isel generated code.
3808 if (!EnableConstpoolPromotion ||
3809 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3810 return SDValue();
3811
3812 auto *GVar = dyn_cast<GlobalVariable>(GV);
3813 if (!GVar || !GVar->hasInitializer() ||
3814 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3815 !GVar->hasLocalLinkage())
3816 return SDValue();
3817
3818 // If we inline a value that contains relocations, we move the relocations
3819 // from .data to .text. This is not allowed in position-independent code.
3820 auto *Init = GVar->getInitializer();
3821 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3822 Init->needsDynamicRelocation())
3823 return SDValue();
3824
3825 // The constant islands pass can only really deal with alignment requests
3826 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3827 // any type wanting greater alignment requirements than 4 bytes. We also
3828 // can only promote constants that are multiples of 4 bytes in size or
3829 // are paddable to a multiple of 4. Currently we only try and pad constants
3830 // that are strings for simplicity.
3831 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3832 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3833 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3834 unsigned RequiredPadding = 4 - (Size % 4);
3835 bool PaddingPossible =
3836 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3837 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3838 Size == 0)
3839 return SDValue();
3840
3841 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3842 MachineFunction &MF = DAG.getMachineFunction();
3843 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3844
3845 // We can't bloat the constant pool too much, else the ConstantIslands pass
3846 // may fail to converge. If we haven't promoted this global yet (it may have
3847 // multiple uses), and promoting it would increase the constant pool size (Sz
3848 // > 4), ensure we have space to do so up to MaxTotal.
3849 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3850 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3851 ConstpoolPromotionMaxTotal)
3852 return SDValue();
3853
3854 // This is only valid if all users are in a single function; we can't clone
3855 // the constant in general. The LLVM IR unnamed_addr allows merging
3856 // constants, but not cloning them.
3857 //
3858 // We could potentially allow cloning if we could prove all uses of the
3859 // constant in the current function don't care about the address, like
3860 // printf format strings. But that isn't implemented for now.
3861 if (!allUsersAreInFunction(GVar, &F))
3862 return SDValue();
3863
3864 // We're going to inline this global. Pad it out if needed.
3865 if (RequiredPadding != 4) {
3866 StringRef S = CDAInit->getAsString();
3867
3868 SmallVector<uint8_t,16> V(S.size());
3869 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3870 while (RequiredPadding--)
3871 V.push_back(0);
3872 Init = ConstantDataArray::get(*DAG.getContext(), V);
3873 }
3874
3875 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3876 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3877 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3878 AFI->markGlobalAsPromotedToConstantPool(GVar);
3879 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3880 PaddedSize - 4);
3881 }
3882 ++NumConstpoolPromoted;
3883 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3884}
3885
3886bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3887 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3888 if (!(GV = GA->getAliaseeObject()))
3889 return false;
3890 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3891 return V->isConstant();
3892 return isa<Function>(GV);
3893}
3894
3895SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3896 SelectionDAG &DAG) const {
3897 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3898 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3898)
;
3899 case Triple::COFF:
3900 return LowerGlobalAddressWindows(Op, DAG);
3901 case Triple::ELF:
3902 return LowerGlobalAddressELF(Op, DAG);
3903 case Triple::MachO:
3904 return LowerGlobalAddressDarwin(Op, DAG);
3905 }
3906}
3907
3908SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3909 SelectionDAG &DAG) const {
3910 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3911 SDLoc dl(Op);
3912 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3913 const TargetMachine &TM = getTargetMachine();
3914 bool IsRO = isReadOnly(GV);
3915
3916 // promoteToConstantPool only if not generating XO text section
3917 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3918 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3919 return V;
3920
3921 if (isPositionIndependent()) {
3922 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3923 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3924 UseGOT_PREL ? ARMII::MO_GOT : 0);
3925 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3926 if (UseGOT_PREL)
3927 Result =
3928 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3929 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3930 return Result;
3931 } else if (Subtarget->isROPI() && IsRO) {
3932 // PC-relative.
3933 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3934 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3935 return Result;
3936 } else if (Subtarget->isRWPI() && !IsRO) {
3937 // SB-relative.
3938 SDValue RelAddr;
3939 if (Subtarget->useMovt()) {
3940 ++NumMovwMovt;
3941 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3942 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3943 } else { // use literal pool for address constant
3944 ARMConstantPoolValue *CPV =
3945 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3946 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3947 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3948 RelAddr = DAG.getLoad(
3949 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3950 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3951 }
3952 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3953 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3954 return Result;
3955 }
3956
3957 // If we have T2 ops, we can materialize the address directly via movt/movw
3958 // pair. This is always cheaper.
3959 if (Subtarget->useMovt()) {
3960 ++NumMovwMovt;
3961 // FIXME: Once remat is capable of dealing with instructions with register
3962 // operands, expand this into two nodes.
3963 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3964 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3965 } else {
3966 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3967 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3968 return DAG.getLoad(
3969 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3970 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3971 }
3972}
3973
3974SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3975 SelectionDAG &DAG) const {
3976 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3977, __extension__
__PRETTY_FUNCTION__))
3977 "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3977, __extension__
__PRETTY_FUNCTION__))
;
3978 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3979 SDLoc dl(Op);
3980 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3981
3982 if (Subtarget->useMovt())
3983 ++NumMovwMovt;
3984
3985 // FIXME: Once remat is capable of dealing with instructions with register
3986 // operands, expand this into multiple nodes
3987 unsigned Wrapper =
3988 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3989
3990 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3991 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3992
3993 if (Subtarget->isGVIndirectSymbol(GV))
3994 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3995 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3996 return Result;
3997}
3998
3999SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4000 SelectionDAG &DAG) const {
4001 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"non-Windows COFF is not supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4001, __extension__
__PRETTY_FUNCTION__))
;
4002 assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail
("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4003, __extension__
__PRETTY_FUNCTION__))
4003 "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail
("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4003, __extension__
__PRETTY_FUNCTION__))
;
4004 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4005, __extension__
__PRETTY_FUNCTION__))
4005 "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4005, __extension__
__PRETTY_FUNCTION__))
;
4006
4007 const TargetMachine &TM = getTargetMachine();
4008 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4009 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4010 if (GV->hasDLLImportStorageClass())
4011 TargetFlags = ARMII::MO_DLLIMPORT;
4012 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
4013 TargetFlags = ARMII::MO_COFFSTUB;
4014 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4015 SDValue Result;
4016 SDLoc DL(Op);
4017
4018 ++NumMovwMovt;
4019
4020 // FIXME: Once remat is capable of dealing with instructions with register
4021 // operands, expand this into two nodes.
4022 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4023 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4024 TargetFlags));
4025 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4026 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4027 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4028 return Result;
4029}
4030
4031SDValue
4032ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4033 SDLoc dl(Op);
4034 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4035 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4036 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4037 Op.getOperand(1), Val);
4038}
4039
4040SDValue
4041ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4042 SDLoc dl(Op);
4043 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4044 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4045}
4046
4047SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4048 SelectionDAG &DAG) const {
4049 SDLoc dl(Op);
4050 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4051 Op.getOperand(0));
4052}
4053
4054SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4055 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4056 unsigned IntNo =
4057 cast<ConstantSDNode>(
4058 Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
4059 ->getZExtValue();
4060 switch (IntNo) {
4061 default:
4062 return SDValue(); // Don't custom lower most intrinsics.
4063 case Intrinsic::arm_gnu_eabi_mcount: {
4064 MachineFunction &MF = DAG.getMachineFunction();
4065 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4066 SDLoc dl(Op);
4067 SDValue Chain = Op.getOperand(0);
4068 // call "\01__gnu_mcount_nc"
4069 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4070 const uint32_t *Mask =
4071 ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4072 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4072, __extension__
__PRETTY_FUNCTION__))
;
4073 // Mark LR an implicit live-in.
4074 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4075 SDValue ReturnAddress =
4076 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4077 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4078 SDValue Callee =
4079 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4080 SDValue RegisterMask = DAG.getRegisterMask(Mask);
4081 if (Subtarget->isThumb())
4082 return SDValue(
4083 DAG.getMachineNode(
4084 ARM::tBL_PUSHLR, dl, ResultTys,
4085 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4086 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4087 0);
4088 return SDValue(
4089 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4090 {ReturnAddress, Callee, RegisterMask, Chain}),
4091 0);
4092 }
4093 }
4094}
4095
4096SDValue
4097ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4098 const ARMSubtarget *Subtarget) const {
4099 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4100 SDLoc dl(Op);
4101 switch (IntNo) {
4102 default: return SDValue(); // Don't custom lower most intrinsics.
4103 case Intrinsic::thread_pointer: {
4104 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4105 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4106 }
4107 case Intrinsic::arm_cls: {
4108 const SDValue &Operand = Op.getOperand(1);
4109 const EVT VTy = Op.getValueType();
4110 SDValue SRA =
4111 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4112 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4113 SDValue SHL =
4114 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4115 SDValue OR =
4116 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4117 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4118 return Result;
4119 }
4120 case Intrinsic::arm_cls64: {
4121 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4122 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4123 const SDValue &Operand = Op.getOperand(1);
4124 const EVT VTy = Op.getValueType();
4125 SDValue Lo, Hi;
4126 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4127 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4128 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4129 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4130 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4131 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4132 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4133 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4134 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4135 SDValue CheckLo =
4136 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4137 SDValue HiIsZero =
4138 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4139 SDValue AdjustedLo =
4140 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4141 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4142 SDValue Result =
4143 DAG.getSelect(dl, VTy, CheckLo,
4144 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4145 return Result;
4146 }
4147 case Intrinsic::eh_sjlj_lsda: {
4148 MachineFunction &MF = DAG.getMachineFunction();
4149 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4150 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4151 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4152 SDValue CPAddr;
4153 bool IsPositionIndependent = isPositionIndependent();
4154 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4155 ARMConstantPoolValue *CPV =
4156 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4157 ARMCP::CPLSDA, PCAdj);
4158 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4159 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4160 SDValue Result = DAG.getLoad(
4161 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4162 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
4163
4164 if (IsPositionIndependent) {
4165 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4166 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4167 }
4168 return Result;
4169 }
4170 case Intrinsic::arm_neon_vabs:
4171 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4172 Op.getOperand(1));
4173 case Intrinsic::arm_neon_vmulls:
4174 case Intrinsic::arm_neon_vmullu: {
4175 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4176 ? ARMISD::VMULLs : ARMISD::VMULLu;
4177 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4178 Op.getOperand(1), Op.getOperand(2));
4179 }
4180 case Intrinsic::arm_neon_vminnm:
4181 case Intrinsic::arm_neon_vmaxnm: {
4182 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4183 ? ISD::FMINNUM : ISD::FMAXNUM;
4184 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4185 Op.getOperand(1), Op.getOperand(2));
4186 }
4187 case Intrinsic::arm_neon_vminu:
4188 case Intrinsic::arm_neon_vmaxu: {
4189 if (Op.getValueType().isFloatingPoint())
4190 return SDValue();
4191 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4192 ? ISD::UMIN : ISD::UMAX;
4193 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4194 Op.getOperand(1), Op.getOperand(2));
4195 }
4196 case Intrinsic::arm_neon_vmins:
4197 case Intrinsic::arm_neon_vmaxs: {
4198 // v{min,max}s is overloaded between signed integers and floats.
4199 if (!Op.getValueType().isFloatingPoint()) {
4200 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4201 ? ISD::SMIN : ISD::SMAX;
4202 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4203 Op.getOperand(1), Op.getOperand(2));
4204 }
4205 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4206 ? ISD::FMINIMUM : ISD::FMAXIMUM;
4207 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4208 Op.getOperand(1), Op.getOperand(2));
4209 }
4210 case Intrinsic::arm_neon_vtbl1:
4211 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4212 Op.getOperand(1), Op.getOperand(2));
4213 case Intrinsic::arm_neon_vtbl2:
4214 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4215 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4216 case Intrinsic::arm_mve_pred_i2v:
4217 case Intrinsic::arm_mve_pred_v2i:
4218 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4219 Op.getOperand(1));
4220 case Intrinsic::arm_mve_vreinterpretq:
4221 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4222 Op.getOperand(1));
4223 case Intrinsic::arm_mve_lsll:
4224 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4225 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4226 case Intrinsic::arm_mve_asrl:
4227 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4228 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4229 }
4230}
4231
4232static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4233 const ARMSubtarget *Subtarget) {
4234 SDLoc dl(Op);
4235 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
4236 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
4237 if (SSID == SyncScope::SingleThread)
4238 return Op;
4239
4240 if (!Subtarget->hasDataBarrier()) {
4241 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4242 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4243 // here.
4244 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4245, __extension__
__PRETTY_FUNCTION__))
4245 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4245, __extension__
__PRETTY_FUNCTION__))
;
4246 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4247 DAG.getConstant(0, dl, MVT::i32));
4248 }
4249
4250 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
4251 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
4252 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
4253 if (Subtarget->isMClass()) {
4254 // Only a full system barrier exists in the M-class architectures.
4255 Domain = ARM_MB::SY;
4256 } else if (Subtarget->preferISHSTBarriers() &&
4257 Ord == AtomicOrdering::Release) {
4258 // Swift happens to implement ISHST barriers in a way that's compatible with
4259 // Release semantics but weaker than ISH so we'd be fools not to use
4260 // it. Beware: other processors probably don't!
4261 Domain = ARM_MB::ISHST;
4262 }
4263
4264 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4265 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4266 DAG.getConstant(Domain, dl, MVT::i32));
4267}
4268
4269static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
4270 const ARMSubtarget *Subtarget) {
4271 // ARM pre v5TE and Thumb1 does not have preload instructions.
4272 if (!(Subtarget->isThumb2() ||
4273 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4274 // Just preserve the chain.
4275 return Op.getOperand(0);
4276
4277 SDLoc dl(Op);
4278 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
4279 if (!isRead &&
4280 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4281 // ARMv7 with MP extension has PLDW.
4282 return Op.getOperand(0);
4283
4284 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4285 if (Subtarget->isThumb()) {
4286 // Invert the bits.
4287 isRead = ~isRead & 1;
4288 isData = ~isData & 1;
4289 }
4290
4291 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4292 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4293 DAG.getConstant(isData, dl, MVT::i32));
4294}
4295
4296static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
4297 MachineFunction &MF = DAG.getMachineFunction();
4298 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4299
4300 // vastart just stores the address of the VarArgsFrameIndex slot into the
4301 // memory location argument.
4302 SDLoc dl(Op);
4303 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4304 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4305 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4306 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4307 MachinePointerInfo(SV));
4308}
4309
4310SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4311 CCValAssign &NextVA,
4312 SDValue &Root,
4313 SelectionDAG &DAG,
4314 const SDLoc &dl) const {
4315 MachineFunction &MF = DAG.getMachineFunction();
4316 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4317
4318 const TargetRegisterClass *RC;
4319 if (AFI->isThumb1OnlyFunction())
4320 RC = &ARM::tGPRRegClass;
4321 else
4322 RC = &ARM::GPRRegClass;
4323
4324 // Transform the arguments stored in physical registers into virtual ones.
4325 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4326 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4327
4328 SDValue ArgValue2;
4329 if (NextVA.isMemLoc()) {
4330 MachineFrameInfo &MFI = MF.getFrameInfo();
4331 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4332
4333 // Create load node to retrieve arguments from the stack.
4334 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4335 ArgValue2 = DAG.getLoad(
4336 MVT::i32, dl, Root, FIN,
4337 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4338 } else {
4339 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4340 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4341 }
4342 if (!Subtarget->isLittle())
4343 std::swap (ArgValue, ArgValue2);
4344 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4345}
4346
4347// The remaining GPRs hold either the beginning of variable-argument
4348// data, or the beginning of an aggregate passed by value (usually
4349// byval). Either way, we allocate stack slots adjacent to the data
4350// provided by our caller, and store the unallocated registers there.
4351// If this is a variadic function, the va_list pointer will begin with
4352// these values; otherwise, this reassembles a (byval) structure that
4353// was split between registers and memory.
4354// Return: The frame index registers were stored into.
4355int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4356 const SDLoc &dl, SDValue &Chain,
4357 const Value *OrigArg,
4358 unsigned InRegsParamRecordIdx,
4359 int ArgOffset, unsigned ArgSize) const {
4360 // Currently, two use-cases possible:
4361 // Case #1. Non-var-args function, and we meet first byval parameter.
4362 // Setup first unallocated register as first byval register;
4363 // eat all remained registers
4364 // (these two actions are performed by HandleByVal method).
4365 // Then, here, we initialize stack frame with
4366 // "store-reg" instructions.
4367 // Case #2. Var-args function, that doesn't contain byval parameters.
4368 // The same: eat all remained unallocated registers,
4369 // initialize stack frame.
4370
4371 MachineFunction &MF = DAG.getMachineFunction();
4372 MachineFrameInfo &MFI = MF.getFrameInfo();
4373 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4374 unsigned RBegin, REnd;
4375 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4376 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4377 } else {
4378 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4379 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4380 REnd = ARM::R4;
4381 }
4382
4383 if (REnd != RBegin)
4384 ArgOffset = -4 * (ARM::R4 - RBegin);
4385
4386 auto PtrVT = getPointerTy(DAG.getDataLayout());
4387 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4388 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4389
4390 SmallVector<SDValue, 4> MemOps;
4391 const TargetRegisterClass *RC =
4392 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4393
4394 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4395 Register VReg = MF.addLiveIn(Reg, RC);
4396 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4397 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4398 MachinePointerInfo(OrigArg, 4 * i));
4399 MemOps.push_back(Store);
4400 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4401 }
4402
4403 if (!MemOps.empty())
4404 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4405 return FrameIndex;
4406}
4407
4408// Setup stack frame, the va_list pointer will start from.
4409void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4410 const SDLoc &dl, SDValue &Chain,
4411 unsigned ArgOffset,
4412 unsigned TotalArgRegsSaveSize,
4413 bool ForceMutable) const {
4414 MachineFunction &MF = DAG.getMachineFunction();
4415 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4416
4417 // Try to store any remaining integer argument regs
4418 // to their spots on the stack so that they may be loaded by dereferencing
4419 // the result of va_next.
4420 // If there is no regs to be stored, just point address after last
4421 // argument passed via stack.
4422 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
4423 CCInfo.getInRegsParamsCount(),
4424 CCInfo.getNextStackOffset(),
4425 std::max(4U, TotalArgRegsSaveSize));
4426 AFI->setVarArgsFrameIndex(FrameIndex);
4427}
4428
4429bool ARMTargetLowering::splitValueIntoRegisterParts(
4430 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4431 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4432 EVT ValueVT = Val.getValueType();
4433 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4434 unsigned ValueBits = ValueVT.getSizeInBits();
4435 unsigned PartBits = PartVT.getSizeInBits();
4436 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4437 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4438 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4439 Parts[0] = Val;
4440 return true;
4441 }
4442 return false;
4443}
4444
4445SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4446 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4447 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4448 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4449 unsigned ValueBits = ValueVT.getSizeInBits();
4450 unsigned PartBits = PartVT.getSizeInBits();
4451 SDValue Val = Parts[0];
4452
4453 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4454 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4455 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4456 return Val;
4457 }
4458 return SDValue();
4459}
4460
4461SDValue ARMTargetLowering::LowerFormalArguments(
4462 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4463 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4464 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4465 MachineFunction &MF = DAG.getMachineFunction();
4466 MachineFrameInfo &MFI = MF.getFrameInfo();
4467
4468 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4469
4470 // Assign locations to all of the incoming arguments.
4471 SmallVector<CCValAssign, 16> ArgLocs;
4472 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4473 *DAG.getContext());
4474 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4475
4476 SmallVector<SDValue, 16> ArgValues;
4477 SDValue ArgValue;
4478 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4479 unsigned CurArgIdx = 0;
4480
4481 // Initially ArgRegsSaveSize is zero.
4482 // Then we increase this value each time we meet byval parameter.
4483 // We also increase this value in case of varargs function.
4484 AFI->setArgRegsSaveSize(0);
4485
4486 // Calculate the amount of stack space that we need to allocate to store
4487 // byval and variadic arguments that are passed in registers.
4488 // We need to know this before we allocate the first byval or variadic
4489 // argument, as they will be allocated a stack slot below the CFA (Canonical
4490 // Frame Address, the stack pointer at entry to the function).
4491 unsigned ArgRegBegin = ARM::R4;
4492 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4493 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4494 break;
4495
4496 CCValAssign &VA = ArgLocs[i];
4497 unsigned Index = VA.getValNo();
4498 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4499 if (!Flags.isByVal())
4500 continue;
4501
4502 assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4502, __extension__
__PRETTY_FUNCTION__))
;
4503 unsigned RBegin, REnd;
4504 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4505 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4506
4507 CCInfo.nextInRegsParam();
4508 }
4509 CCInfo.rewindByValRegsInfo();
4510
4511 int lastInsIndex = -1;
4512 if (isVarArg && MFI.hasVAStart()) {
4513 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4514 if (RegIdx != std::size(GPRArgRegs))
4515 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4516 }
4517
4518 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4519 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4520 auto PtrVT = getPointerTy(DAG.getDataLayout());
4521
4522 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4523 CCValAssign &VA = ArgLocs[i];
4524 if (Ins[VA.getValNo()].isOrigArg()) {
4525 std::advance(CurOrigArg,
4526 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4527 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4528 }
4529 // Arguments stored in registers.
4530 if (VA.isRegLoc()) {
4531 EVT RegVT = VA.getLocVT();
4532
4533 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4534 // f64 and vector types are split up into multiple registers or
4535 // combinations of registers and stack slots.
4536 SDValue ArgValue1 =
4537 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4538 VA = ArgLocs[++i]; // skip ahead to next loc
4539 SDValue ArgValue2;
4540 if (VA.isMemLoc()) {
4541 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4542 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4543 ArgValue2 = DAG.getLoad(
4544 MVT::f64, dl, Chain, FIN,
4545 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4546 } else {
4547 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4548 }
4549 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4550 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4551 ArgValue1, DAG.getIntPtrConstant(0, dl));
4552 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4553 ArgValue2, DAG.getIntPtrConstant(1, dl));
4554 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4555 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4556 } else {
4557 const TargetRegisterClass *RC;
4558
4559 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4560 RC = &ARM::HPRRegClass;
4561 else if (RegVT == MVT::f32)
4562 RC = &ARM::SPRRegClass;
4563 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4564 RegVT == MVT::v4bf16)
4565 RC = &ARM::DPRRegClass;
4566 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4567 RegVT == MVT::v8bf16)
4568 RC = &ARM::QPRRegClass;
4569 else if (RegVT == MVT::i32)
4570 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4571 : &ARM::GPRRegClass;
4572 else
4573 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4573)
;
4574
4575 // Transform the arguments in physical registers into virtual ones.
4576 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4577 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4578
4579 // If this value is passed in r0 and has the returned attribute (e.g.
4580 // C++ 'structors), record this fact for later use.
4581 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4582 AFI->setPreservesR0();
4583 }
4584 }
4585
4586 // If this is an 8 or 16-bit value, it is really passed promoted
4587 // to 32 bits. Insert an assert[sz]ext to capture this, then
4588 // truncate to the right size.
4589 switch (VA.getLocInfo()) {
4590 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4590)
;
4591 case CCValAssign::Full: break;
4592 case CCValAssign::BCvt:
4593 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4594 break;
4595 case CCValAssign::SExt:
4596 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4597 DAG.getValueType(VA.getValVT()));
4598 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4599 break;
4600 case CCValAssign::ZExt:
4601 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4602 DAG.getValueType(VA.getValVT()));
4603 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4604 break;
4605 }
4606
4607 // f16 arguments have their size extended to 4 bytes and passed as if they
4608 // had been copied to the LSBs of a 32-bit register.
4609 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4610 if (VA.needsCustom() &&
4611 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4612 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4613
4614 InVals.push_back(ArgValue);
4615 } else { // VA.isRegLoc()
4616 // Only arguments passed on the stack should make it here.
4617 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
4617, __extension__ __PRETTY_FUNCTION__))
;
4618 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 &&
"i64 should already be lowered") ? void (0) : __assert_fail (
"VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4618, __extension__
__PRETTY_FUNCTION__))
;
4619
4620 int index = VA.getValNo();
4621
4622 // Some Ins[] entries become multiple ArgLoc[] entries.
4623 // Process them only once.
4624 if (index != lastInsIndex)
4625 {
4626 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4627 // FIXME: For now, all byval parameter objects are marked mutable.
4628 // This can be changed with more analysis.
4629 // In case of tail call optimization mark all arguments mutable.
4630 // Since they could be overwritten by lowering of arguments in case of
4631 // a tail call.
4632 if (Flags.isByVal()) {
4633 assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4634, __extension__
__PRETTY_FUNCTION__))
4634 "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4634, __extension__
__PRETTY_FUNCTION__))
;
4635 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4636
4637 int FrameIndex = StoreByValRegs(
4638 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4639 VA.getLocMemOffset(), Flags.getByValSize());
4640 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4641 CCInfo.nextInRegsParam();
4642 } else {
4643 unsigned FIOffset = VA.getLocMemOffset();
4644 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4645 FIOffset, true);
4646
4647 // Create load nodes to retrieve arguments from the stack.
4648 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4649 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4650 MachinePointerInfo::getFixedStack(
4651 DAG.getMachineFunction(), FI)));
4652 }
4653 lastInsIndex = index;
4654 }
4655 }
4656 }
4657
4658 // varargs
4659 if (isVarArg && MFI.hasVAStart()) {
4660 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
4661 TotalArgRegsSaveSize);
4662 if (AFI->isCmseNSEntryFunction()) {
4663 DiagnosticInfoUnsupported Diag(
4664 DAG.getMachineFunction().getFunction(),
4665 "secure entry function must not be variadic", dl.getDebugLoc());
4666 DAG.getContext()->diagnose(Diag);
4667 }
4668 }
4669
4670 unsigned StackArgSize = CCInfo.getNextStackOffset();
4671 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4672 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4673 // The only way to guarantee a tail call is if the callee restores its
4674 // argument area, but it must also keep the stack aligned when doing so.
4675 const DataLayout &DL = DAG.getDataLayout();
4676 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4677
4678 AFI->setArgumentStackToRestore(StackArgSize);
4679 }
4680 AFI->setArgumentStackSize(StackArgSize);
4681
4682 if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
4683 DiagnosticInfoUnsupported Diag(
4684 DAG.getMachineFunction().getFunction(),
4685 "secure entry function requires arguments on stack", dl.getDebugLoc());
4686 DAG.getContext()->diagnose(Diag);
4687 }
4688
4689 return Chain;
4690}
4691
4692/// isFloatingPointZero - Return true if this is +0.0.
4693static bool isFloatingPointZero(SDValue Op) {
4694 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4695 return CFP->getValueAPF().isPosZero();
4696 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4697 // Maybe this has already been legalized into the constant pool?
4698 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4699 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4700 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4701 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4702 return CFP->getValueAPF().isPosZero();
4703 }
4704 } else if (Op->getOpcode() == ISD::BITCAST &&
4705 Op->getValueType(0) == MVT::f64) {
4706 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4707 // created by LowerConstantFP().
4708 SDValue BitcastOp = Op->getOperand(0);
4709 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4710 isNullConstant(BitcastOp->getOperand(0)))
4711 return true;
4712 }
4713 return false;
4714}
4715
4716/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4717/// the given operands.
4718SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4719 SDValue &ARMcc, SelectionDAG &DAG,
4720 const SDLoc &dl) const {
4721 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4722 unsigned C = RHSC->getZExtValue();
4723 if (!isLegalICmpImmediate((int32_t)C)) {
4724 // Constant does not fit, try adjusting it by one.
4725 switch (CC) {
4726 default: break;
4727 case ISD::SETLT:
4728 case ISD::SETGE:
4729 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4730 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4731 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4732 }
4733 break;
4734 case ISD::SETULT:
4735 case ISD::SETUGE:
4736 if (C != 0 && isLegalICmpImmediate(C-1)) {
4737 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4738 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4739 }
4740 break;
4741 case ISD::SETLE:
4742 case ISD::SETGT:
4743 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4744 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4745 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4746 }
4747 break;
4748 case ISD::SETULE:
4749 case ISD::SETUGT:
4750 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4751 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4752 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4753 }
4754 break;
4755 }
4756 }
4757 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4758 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
4759 // In ARM and Thumb-2, the compare instructions can shift their second
4760 // operand.
4761 CC = ISD::getSetCCSwappedOperands(CC);
4762 std::swap(LHS, RHS);
4763 }
4764
4765 // Thumb1 has very limited immediate modes, so turning an "and" into a
4766 // shift can save multiple instructions.
4767 //
4768 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4769 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4770 // own. If it's the operand to an unsigned comparison with an immediate,
4771 // we can eliminate one of the shifts: we transform
4772 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4773 //
4774 // We avoid transforming cases which aren't profitable due to encoding
4775 // details:
4776 //
4777 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4778 // would not; in that case, we're essentially trading one immediate load for
4779 // another.
4780 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4781 // 3. C2 is zero; we have other code for this special case.
4782 //
4783 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4784 // instruction, since the AND is always one instruction anyway, but we could
4785 // use narrow instructions in some cases.
4786 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4787 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4788 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4789 !isSignedIntSetCC(CC)) {
4790 unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4791 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4792 uint64_t RHSV = RHSC->getZExtValue();
4793 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4794 unsigned ShiftBits = llvm::countl_zero(Mask);
4795 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4796 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4797 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4798 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4799 }
4800 }
4801 }
4802
4803 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4804 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4805 // way a cmp would.
4806 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4807 // some tweaks to the heuristics for the previous and->shift transform.
4808 // FIXME: Optimize cases where the LHS isn't a shift.
4809 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4810 isa<ConstantSDNode>(RHS) &&
4811 cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4812 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4813 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4814 unsigned ShiftAmt =
4815 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4816 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4817 DAG.getVTList(MVT::i32, MVT::i32),
4818 LHS.getOperand(0),
4819 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4820 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4821 Shift.getValue(1), SDValue());
4822 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4823 return Chain.getValue(1);
4824 }
4825
4826 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4827
4828 // If the RHS is a constant zero then the V (overflow) flag will never be
4829 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4830 // simpler for other passes (like the peephole optimiser) to deal with.
4831 if (isNullConstant(RHS)) {
4832 switch (CondCode) {
4833 default: break;
4834 case ARMCC::GE:
4835 CondCode = ARMCC::PL;
4836 break;
4837 case ARMCC::LT:
4838 CondCode = ARMCC::MI;
4839 break;
4840 }
4841 }
4842
4843 ARMISD::NodeType CompareType;
4844 switch (CondCode) {
4845 default:
4846 CompareType = ARMISD::CMP;
4847 break;
4848 case ARMCC::EQ:
4849 case ARMCC::NE:
4850 // Uses only Z Flag
4851 CompareType = ARMISD::CMPZ;
4852 break;
4853 }
4854 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4855 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4856}
4857
4858/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4859SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4860 SelectionDAG &DAG, const SDLoc &dl,
4861 bool Signaling) const {
4862 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast <bool> (Subtarget->hasFP64() || RHS.getValueType
() != MVT::f64) ? void (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4862, __extension__
__PRETTY_FUNCTION__))
;
4863 SDValue Cmp;
4864 if (!isFloatingPointZero(RHS))
4865 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4866 dl, MVT::Glue, LHS, RHS);
4867 else
4868 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4869 dl, MVT::Glue, LHS);
4870 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4871}
4872
4873/// duplicateCmp - Glue values can have only one use, so this function
4874/// duplicates a comparison node.
4875SDValue
4876ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4877 unsigned Opc = Cmp.getOpcode();
4878 SDLoc DL(Cmp);
4879 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4880 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4881
4882 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4882, __extension__
__PRETTY_FUNCTION__))
;
4883 Cmp = Cmp.getOperand(0);
4884 Opc = Cmp.getOpcode();
4885 if (Opc == ARMISD::CMPFP)
4886 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4887 else {
4888 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4888, __extension__
__PRETTY_FUNCTION__))
;
4889 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4890 }
4891 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4892}
4893
4894// This function returns three things: the arithmetic computation itself
4895// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4896// comparison and the condition code define the case in which the arithmetic
4897// computation *does not* overflow.
4898std::pair<SDValue, SDValue>
4899ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4900 SDValue &ARMcc) const {
4901 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4901, __extension__
__PRETTY_FUNCTION__))
;
4902
4903 SDValue Value, OverflowCmp;
4904 SDValue LHS = Op.getOperand(0);
4905 SDValue RHS = Op.getOperand(1);
4906 SDLoc dl(Op);
4907
4908 // FIXME: We are currently always generating CMPs because we don't support
4909 // generating CMN through the backend. This is not as good as the natural
4910 // CMP case because it causes a register dependency and cannot be folded
4911 // later.
4912
4913 switch (Op.getOpcode()) {
4914 default:
4915 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4915)
;
4916 case ISD::SADDO:
4917 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4918 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4919 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4920 break;
4921 case ISD::UADDO:
4922 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4923 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4924 // We do not use it in the USUBO case as Value may not be used.
4925 Value = DAG.getNode(ARMISD::ADDC, dl,
4926 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4927 .getValue(0);
4928 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4929 break;
4930 case ISD::SSUBO:
4931 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4932 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4933 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4934 break;
4935 case ISD::USUBO:
4936 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4937 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4938 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4939 break;
4940 case ISD::UMULO:
4941 // We generate a UMUL_LOHI and then check if the high word is 0.
4942 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4943 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4944 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4945 LHS, RHS);
4946 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4947 DAG.getConstant(0, dl, MVT::i32));
4948 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4949 break;
4950 case ISD::SMULO:
4951 // We generate a SMUL_LOHI and then check if all the bits of the high word
4952 // are the same as the sign bit of the low word.
4953 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4954 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4955 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4956 LHS, RHS);
4957 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4958 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4959 Value.getValue(0),
4960 DAG.getConstant(31, dl, MVT::i32)));
4961 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4962 break;
4963 } // switch (...)
4964
4965 return std::make_pair(Value, OverflowCmp);
4966}
4967
4968SDValue
4969ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4970 // Let legalize expand this if it isn't a legal type yet.
4971 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4972 return SDValue();
4973
4974 SDValue Value, OverflowCmp;
4975 SDValue ARMcc;
4976 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4977 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4978 SDLoc dl(Op);
4979 // We use 0 and 1 as false and true values.
4980 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4981 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4982 EVT VT = Op.getValueType();
4983
4984 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4985 ARMcc, CCR, OverflowCmp);
4986
4987 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4988 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4989}
4990
4991static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
4992 SelectionDAG &DAG) {
4993 SDLoc DL(BoolCarry);
4994 EVT CarryVT = BoolCarry.getValueType();
4995
4996 // This converts the boolean value carry into the carry flag by doing
4997 // ARMISD::SUBC Carry, 1
4998 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4999 DAG.getVTList(CarryVT, MVT::i32),
5000 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5001 return Carry.getValue(1);
5002}
5003
5004static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
5005 SelectionDAG &DAG) {
5006 SDLoc DL(Flags);
5007
5008 // Now convert the carry flag into a boolean carry. We do this
5009 // using ARMISD:ADDE 0, 0, Carry
5010 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5011 DAG.getConstant(0, DL, MVT::i32),
5012 DAG.getConstant(0, DL, MVT::i32), Flags);
5013}
5014
5015SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5016 SelectionDAG &DAG) const {
5017 // Let legalize expand this if it isn't a legal type yet.
5018 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5019 return SDValue();
5020
5021 SDValue LHS = Op.getOperand(0);
5022 SDValue RHS = Op.getOperand(1);
5023 SDLoc dl(Op);
5024
5025 EVT VT = Op.getValueType();
5026 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5027 SDValue Value;
5028 SDValue Overflow;
5029 switch (Op.getOpcode()) {
5030 default:
5031 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5031)
;
5032 case ISD::UADDO:
5033 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5034 // Convert the carry flag into a boolean value.
5035 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5036 break;
5037 case ISD::USUBO: {
5038 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5039 // Convert the carry flag into a boolean value.
5040 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5041 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5042 // value. So compute 1 - C.
5043 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5044 DAG.getConstant(1, dl, MVT::i32), Overflow);
5045 break;
5046 }
5047 }
5048
5049 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5050}
5051
5052static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,
5053 const ARMSubtarget *Subtarget) {
5054 EVT VT = Op.getValueType();
5055 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5056 return SDValue();
5057 if (!VT.isSimple())
5058 return SDValue();
5059
5060 unsigned NewOpcode;
5061 switch (VT.getSimpleVT().SimpleTy) {
5062 default:
5063 return SDValue();
5064 case MVT::i8:
5065 switch (Op->getOpcode()) {
5066 case ISD::UADDSAT:
5067 NewOpcode = ARMISD::UQADD8b;
5068 break;
5069 case ISD::SADDSAT:
5070 NewOpcode = ARMISD::QADD8b;
5071 break;
5072 case ISD::USUBSAT:
5073 NewOpcode = ARMISD::UQSUB8b;
5074 break;
5075 case ISD::SSUBSAT:
5076 NewOpcode = ARMISD::QSUB8b;
5077 break;
5078 }
5079 break;
5080 case MVT::i16:
5081 switch (Op->getOpcode()) {
5082 case ISD::UADDSAT:
5083 NewOpcode = ARMISD::UQADD16b;
5084 break;
5085 case ISD::SADDSAT:
5086 NewOpcode = ARMISD::QADD16b;
5087 break;
5088 case ISD::USUBSAT:
5089 NewOpcode = ARMISD::UQSUB16b;
5090 break;
5091 case ISD::SSUBSAT:
5092 NewOpcode = ARMISD::QSUB16b;
5093 break;
5094 }
5095 break;
5096 }
5097
5098 SDLoc dl(Op);
5099 SDValue Add =
5100 DAG.getNode(NewOpcode, dl, MVT::i32,
5101 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5102 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5103 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5104}
5105
5106SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5107 SDValue Cond = Op.getOperand(0);
5108 SDValue SelectTrue = Op.getOperand(1);
5109 SDValue SelectFalse = Op.getOperand(2);
5110 SDLoc dl(Op);
5111 unsigned Opc = Cond.getOpcode();
5112
5113 if (Cond.getResNo() == 1 &&
5114 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5115 Opc == ISD::USUBO)) {
5116 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5117 return SDValue();
5118
5119 SDValue Value, OverflowCmp;
5120 SDValue ARMcc;
5121 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5122 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5123 EVT VT = Op.getValueType();
5124
5125 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5126 OverflowCmp, DAG);
5127 }
5128
5129 // Convert:
5130 //
5131 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5132 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5133 //
5134 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5135 const ConstantSDNode *CMOVTrue =
5136 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5137 const ConstantSDNode *CMOVFalse =
5138 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5139
5140 if (CMOVTrue && CMOVFalse) {
5141 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5142 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5143
5144 SDValue True;
5145 SDValue False;
5146 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5147 True = SelectTrue;
5148 False = SelectFalse;
5149 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5150 True = SelectFalse;
5151 False = SelectTrue;
5152 }
5153
5154 if (True.getNode() && False.getNode()) {
5155 EVT VT = Op.getValueType();
5156 SDValue ARMcc = Cond.getOperand(2);
5157 SDValue CCR = Cond.getOperand(3);
5158 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5159 assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void (
0) : __assert_fail ("True.getValueType() == VT", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5159, __extension__ __PRETTY_FUNCTION__))
;
5160 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5161 }
5162 }
5163 }
5164
5165 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5166 // undefined bits before doing a full-word comparison with zero.
5167 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5168 DAG.getConstant(1, dl, Cond.getValueType()));
5169
5170 return DAG.getSelectCC(dl, Cond,
5171 DAG.getConstant(0, dl, Cond.getValueType()),
5172 SelectTrue, SelectFalse, ISD::SETNE);
5173}
5174
5175static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
5176 bool &swpCmpOps, bool &swpVselOps) {
5177 // Start by selecting the GE condition code for opcodes that return true for
5178 // 'equality'
5179 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5180 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5181 CondCode = ARMCC::GE;
5182
5183 // and GT for opcodes that return false for 'equality'.
5184 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5185 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5186 CondCode = ARMCC::GT;
5187
5188 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5189 // to swap the compare operands.
5190 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5191 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5192 swpCmpOps = true;
5193
5194 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5195 // If we have an unordered opcode, we need to swap the operands to the VSEL
5196 // instruction (effectively negating the condition).
5197 //
5198 // This also has the effect of swapping which one of 'less' or 'greater'
5199 // returns true, so we also swap the compare operands. It also switches
5200 // whether we return true for 'equality', so we compensate by picking the
5201 // opposite condition code to our original choice.
5202 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5203 CC == ISD::SETUGT) {
5204 swpCmpOps = !swpCmpOps;
5205 swpVselOps = !swpVselOps;
5206 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5207 }
5208
5209 // 'ordered' is 'anything but unordered', so use the VS condition code and
5210 // swap the VSEL operands.
5211 if (CC == ISD::SETO) {
5212 CondCode = ARMCC::VS;
5213 swpVselOps = true;
5214 }
5215
5216 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5217 // code and swap the VSEL operands. Also do this if we don't care about the
5218 // unordered case.
5219 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5220 CondCode = ARMCC::EQ;
5221 swpVselOps = true;
5222 }
5223}
5224
5225SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5226 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5227 SDValue Cmp, SelectionDAG &DAG) const {
5228 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5229 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5230 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5231 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5232 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5233
5234 SDValue TrueLow = TrueVal.getValue(0);
5235 SDValue TrueHigh = TrueVal.getValue(1);
5236 SDValue FalseLow = FalseVal.getValue(0);
5237 SDValue FalseHigh = FalseVal.getValue(1);
5238
5239 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5240 ARMcc, CCR, Cmp);
5241 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5242 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5243
5244 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5245 } else {
5246 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5247 Cmp);
5248 }
5249}
5250
5251static bool isGTorGE(ISD::CondCode CC) {
5252 return CC == ISD::SETGT || CC == ISD::SETGE;
5253}
5254
5255static bool isLTorLE(ISD::CondCode CC) {
5256 return CC == ISD::SETLT || CC == ISD::SETLE;
5257}
5258
5259// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5260// All of these conditions (and their <= and >= counterparts) will do:
5261// x < k ? k : x
5262// x > k ? x : k
5263// k < x ? x : k
5264// k > x ? k : x
5265static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5266 const SDValue TrueVal, const SDValue FalseVal,
5267 const ISD::CondCode CC, const SDValue K) {
5268 return (isGTorGE(CC) &&
5269 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5270 (isLTorLE(CC) &&
5271 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5272}
5273
5274// Check if two chained conditionals could be converted into SSAT or USAT.
5275//
5276// SSAT can replace a set of two conditional selectors that bound a number to an
5277// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5278//
5279// x < -k ? -k : (x > k ? k : x)
5280// x < -k ? -k : (x < k ? x : k)
5281// x > -k ? (x > k ? k : x) : -k
5282// x < k ? (x < -k ? -k : x) : k
5283// etc.
5284//
5285// LLVM canonicalizes these to either a min(max()) or a max(min())
5286// pattern. This function tries to match one of these and will return a SSAT
5287// node if successful.
5288//
5289// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5290// is a power of 2.
5291static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
5292 EVT VT = Op.getValueType();
5293 SDValue V1 = Op.getOperand(0);
5294 SDValue K1 = Op.getOperand(1);
5295 SDValue TrueVal1 = Op.getOperand(2);
5296 SDValue FalseVal1 = Op.getOperand(3);
5297 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5298
5299 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5300 if (Op2.getOpcode() != ISD::SELECT_CC)
5301 return SDValue();
5302
5303 SDValue V2 = Op2.getOperand(0);
5304 SDValue K2 = Op2.getOperand(1);
5305 SDValue TrueVal2 = Op2.getOperand(2);
5306 SDValue FalseVal2 = Op2.getOperand(3);
5307 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5308
5309 SDValue V1Tmp = V1;
5310 SDValue V2Tmp = V2;
5311
5312 // Check that the registers and the constants match a max(min()) or min(max())
5313 // pattern
5314 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5315 K2 != FalseVal2 ||
5316 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5317 return SDValue();
5318
5319 // Check that the constant in the lower-bound check is
5320 // the opposite of the constant in the upper-bound check
5321 // in 1's complement.
5322 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5323 return SDValue();
5324
5325 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5326 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5327 int64_t PosVal = std::max(Val1, Val2);
5328 int64_t NegVal = std::min(Val1, Val2);
5329
5330 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5331 !isPowerOf2_64(PosVal + 1))
5332 return SDValue();
5333
5334 // Handle the difference between USAT (unsigned) and SSAT (signed)
5335 // saturation
5336 // At this point, PosVal is guaranteed to be positive
5337 uint64_t K = PosVal;
5338 SDLoc dl(Op);
5339 if (Val1 == ~Val2)
5340 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5341 DAG.getConstant(llvm::countr_one(K), dl, VT));
5342 if (NegVal == 0)
5343 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5344 DAG.getConstant(llvm::countr_one(K), dl, VT));
5345
5346 return SDValue();
5347}
5348
5349// Check if a condition of the type x < k ? k : x can be converted into a
5350// bit operation instead of conditional moves.
5351// Currently this is allowed given:
5352// - The conditions and values match up
5353// - k is 0 or -1 (all ones)
5354// This function will not check the last condition, thats up to the caller
5355// It returns true if the transformation can be made, and in such case
5356// returns x in V, and k in SatK.
5357static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
5358 SDValue &SatK)
5359{
5360 SDValue LHS = Op.getOperand(0);
5361 SDValue RHS = Op.getOperand(1);
5362 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5363 SDValue TrueVal = Op.getOperand(2);
5364 SDValue FalseVal = Op.getOperand(3);
5365
5366 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5367 ? &RHS
5368 : nullptr;
5369
5370 // No constant operation in comparison, early out
5371 if (!K)
5372 return false;
5373
5374 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5375 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5376 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5377
5378 // If the constant on left and right side, or variable on left and right,
5379 // does not match, early out
5380 if (*K != KTmp || V != VTmp)
5381 return false;
5382
5383 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5384 SatK = *K;
5385 return true;
5386 }
5387
5388 return false;
5389}
5390
5391bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5392 if (VT == MVT::f32)
5393 return !Subtarget->hasVFP2Base();
5394 if (VT == MVT::f64)
5395 return !Subtarget->hasFP64();
5396 if (VT == MVT::f16)
5397 return !Subtarget->hasFullFP16();
5398 return false;
5399}
5400
5401SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5402 EVT VT = Op.getValueType();
5403 SDLoc dl(Op);
5404
5405 // Try to convert two saturating conditional selects into a single SSAT
5406 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5407 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5408 return SatValue;
5409
5410 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5411 // into more efficient bit operations, which is possible when k is 0 or -1
5412 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5413 // single instructions. On Thumb the shift and the bit operation will be two
5414 // instructions.
5415 // Only allow this transformation on full-width (32-bit) operations
5416 SDValue LowerSatConstant;
5417 SDValue SatValue;
5418 if (VT == MVT::i32 &&
5419 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5420 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5421 DAG.getConstant(31, dl, VT));
5422 if (isNullConstant(LowerSatConstant)) {
5423 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5424 DAG.getAllOnesConstant(dl, VT));
5425 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5426 } else if (isAllOnesConstant(LowerSatConstant))
5427 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5428 }
5429
5430 SDValue LHS = Op.getOperand(0);
5431 SDValue RHS = Op.getOperand(1);
5432 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5433 SDValue TrueVal = Op.getOperand(2);
5434 SDValue FalseVal = Op.getOperand(3);
5435 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5436 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5437
5438 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5439 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5440 unsigned TVal = CTVal->getZExtValue();
5441 unsigned FVal = CFVal->getZExtValue();
5442 unsigned Opcode = 0;
5443
5444 if (TVal == ~FVal) {
5445 Opcode = ARMISD::CSINV;
5446 } else if (TVal == ~FVal + 1) {
5447 Opcode = ARMISD::CSNEG;
5448 } else if (TVal + 1 == FVal) {
5449 Opcode = ARMISD::CSINC;
5450 } else if (TVal == FVal + 1) {
5451 Opcode = ARMISD::CSINC;
5452 std::swap(TrueVal, FalseVal);
5453 std::swap(TVal, FVal);
5454 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5455 }
5456
5457 if (Opcode) {
5458 // If one of the constants is cheaper than another, materialise the
5459 // cheaper one and let the csel generate the other.
5460 if (Opcode != ARMISD::CSINC &&
5461 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5462 std::swap(TrueVal, FalseVal);
5463 std::swap(TVal, FVal);
5464 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5465 }
5466
5467 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5468 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5469 // -(-a) == a, but (a+1)+1 != a).
5470 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5471 std::swap(TrueVal, FalseVal);
5472 std::swap(TVal, FVal);
5473 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5474 }
5475
5476 // Drops F's value because we can get it by inverting/negating TVal.
5477 FalseVal = TrueVal;
5478
5479 SDValue ARMcc;
5480 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5481 EVT VT = TrueVal.getValueType();
5482 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5483 }
5484 }
5485
5486 if (isUnsupportedFloatingType(LHS.getValueType())) {
5487 DAG.getTargetLoweringInfo().softenSetCCOperands(
5488 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5489
5490 // If softenSetCCOperands only returned one value, we should compare it to
5491 // zero.
5492 if (!RHS.getNode()) {
5493 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5494 CC = ISD::SETNE;
5495 }
5496 }
5497
5498 if (LHS.getValueType() == MVT::i32) {
5499 // Try to generate VSEL on ARMv8.
5500 // The VSEL instruction can't use all the usual ARM condition
5501 // codes: it only has two bits to select the condition code, so it's
5502 // constrained to use only GE, GT, VS and EQ.
5503 //
5504 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5505 // swap the operands of the previous compare instruction (effectively
5506 // inverting the compare condition, swapping 'less' and 'greater') and
5507 // sometimes need to swap the operands to the VSEL (which inverts the
5508 // condition in the sense of firing whenever the previous condition didn't)
5509 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5510 TrueVal.getValueType() == MVT::f32 ||
5511 TrueVal.getValueType() == MVT::f64)) {
5512 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5513 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5514 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5515 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5516 std::swap(TrueVal, FalseVal);
5517 }
5518 }
5519
5520 SDValue ARMcc;
5521 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5522 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5523 // Choose GE over PL, which vsel does now support
5524 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5525 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5526 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5527 }
5528
5529 ARMCC::CondCodes CondCode, CondCode2;
5530 FPCCToARMCC(CC, CondCode, CondCode2);
5531
5532 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5533 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5534 // must use VSEL (limited condition codes), due to not having conditional f16
5535 // moves.
5536 if (Subtarget->hasFPARMv8Base() &&
5537 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5538 (TrueVal.getValueType() == MVT::f16 ||
5539 TrueVal.getValueType() == MVT::f32 ||
5540 TrueVal.getValueType() == MVT::f64)) {
5541 bool swpCmpOps = false;
5542 bool swpVselOps = false;
5543 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5544
5545 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5546 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5547 if (swpCmpOps)
5548 std::swap(LHS, RHS);
5549 if (swpVselOps)
5550 std::swap(TrueVal, FalseVal);
5551 }
5552 }
5553
5554 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5555 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5556 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5557 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5558 if (CondCode2 != ARMCC::AL) {
5559 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5560 // FIXME: Needs another CMP because flag can have but one use.
5561 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5562 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5563 }
5564 return Result;
5565}
5566
5567/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5568/// to morph to an integer compare sequence.
5569static bool canChangeToInt(SDValue Op, bool &SeenZero,
5570 const ARMSubtarget *Subtarget) {
5571 SDNode *N = Op.getNode();
5572 if (!N->hasOneUse())
5573 // Otherwise it requires moving the value from fp to integer registers.
5574 return false;
5575 if (!N->getNumValues())
5576 return false;
5577 EVT VT = Op.getValueType();
5578 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5579 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5580 // vmrs are very slow, e.g. cortex-a8.
5581 return false;
5582
5583 if (isFloatingPointZero(Op)) {
5584 SeenZero = true;
5585 return true;
5586 }
5587 return ISD::isNormalLoad(N);
5588}
5589
5590static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
5591 if (isFloatingPointZero(Op))
5592 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5593
5594 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5595 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5596 Ld->getPointerInfo(), Ld->getAlign(),
5597 Ld->getMemOperand()->getFlags());
5598
5599 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5599)
;
5600}
5601
5602static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
5603 SDValue &RetVal1, SDValue &RetVal2) {
5604 SDLoc dl(Op);
5605
5606 if (isFloatingPointZero(Op)) {
5607 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5608 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5609 return;
5610 }
5611
5612 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5613 SDValue Ptr = Ld->getBasePtr();
5614 RetVal1 =
5615 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5616 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5617
5618 EVT PtrType = Ptr.getValueType();
5619 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5620 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5621 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5622 Ld->getPointerInfo().getWithOffset(4),
5623 commonAlignment(Ld->getAlign(), 4),
5624 Ld->getMemOperand()->getFlags());
5625 return;
5626 }
5627
5628 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5628)
;
5629}
5630
5631/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5632/// f32 and even f64 comparisons to integer ones.
5633SDValue
5634ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5635 SDValue Chain = Op.getOperand(0);
5636 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5637 SDValue LHS = Op.getOperand(2);
5638 SDValue RHS = Op.getOperand(3);
5639 SDValue Dest = Op.getOperand(4);
5640 SDLoc dl(Op);
5641
5642 bool LHSSeenZero = false;
5643 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5644 bool RHSSeenZero = false;
5645 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5646 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5647 // If unsafe fp math optimization is enabled and there are no other uses of
5648 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5649 // to an integer comparison.
5650 if (CC == ISD::SETOEQ)
5651 CC = ISD::SETEQ;
5652 else if (CC == ISD::SETUNE)
5653 CC = ISD::SETNE;
5654
5655 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5656 SDValue ARMcc;
5657 if (LHS.getValueType() == MVT::f32) {
5658 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5659 bitcastf32Toi32(LHS, DAG), Mask);
5660 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5661 bitcastf32Toi32(RHS, DAG), Mask);
5662 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5663 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5664 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5665 Chain, Dest, ARMcc, CCR, Cmp);
5666 }
5667
5668 SDValue LHS1, LHS2;
5669 SDValue RHS1, RHS2;
5670 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5671 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5672 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5673 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5674 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5675 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5676 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5677 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5678 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5679 }
5680
5681 return SDValue();
5682}
5683
5684SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5685 SDValue Chain = Op.getOperand(0);
5686 SDValue Cond = Op.getOperand(1);
5687 SDValue Dest = Op.getOperand(2);
5688 SDLoc dl(Op);
5689
5690 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5691 // instruction.
5692 unsigned Opc = Cond.getOpcode();
5693 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5694 !Subtarget->isThumb1Only();
5695 if (Cond.getResNo() == 1 &&
5696 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5697 Opc == ISD::USUBO || OptimizeMul)) {
5698 // Only lower legal XALUO ops.
5699 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5700 return SDValue();
5701
5702 // The actual operation with overflow check.
5703 SDValue Value, OverflowCmp;
5704 SDValue ARMcc;
5705 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5706
5707 // Reverse the condition code.
5708 ARMCC::CondCodes CondCode =
5709 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5710 CondCode = ARMCC::getOppositeCondition(CondCode);
5711 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5712 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5713
5714 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5715 OverflowCmp);
5716 }
5717
5718 return SDValue();
5719}
5720
5721SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5722 SDValue Chain = Op.getOperand(0);
5723 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5724 SDValue LHS = Op.getOperand(2);
5725 SDValue RHS = Op.getOperand(3);
5726 SDValue Dest = Op.getOperand(4);
5727 SDLoc dl(Op);
5728
5729 if (isUnsupportedFloatingType(LHS.getValueType())) {
5730 DAG.getTargetLoweringInfo().softenSetCCOperands(
5731 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5732
5733 // If softenSetCCOperands only returned one value, we should compare it to
5734 // zero.
5735 if (!RHS.getNode()) {
5736 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5737 CC = ISD::SETNE;
5738 }
5739 }
5740
5741 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5742 // instruction.
5743 unsigned Opc = LHS.getOpcode();
5744 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5745 !Subtarget->isThumb1Only();
5746 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5747 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5748 Opc == ISD::USUBO || OptimizeMul) &&
5749 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5750 // Only lower legal XALUO ops.
5751 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5752 return SDValue();
5753
5754 // The actual operation with overflow check.
5755 SDValue Value, OverflowCmp;
5756 SDValue ARMcc;
5757 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5758
5759 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5760 // Reverse the condition code.
5761 ARMCC::CondCodes CondCode =
5762 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5763 CondCode = ARMCC::getOppositeCondition(CondCode);
5764 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5765 }
5766 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5767
5768 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5769 OverflowCmp);
5770 }
5771
5772 if (LHS.getValueType() == MVT::i32) {
5773 SDValue ARMcc;
5774 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5775 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5776 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5777 Chain, Dest, ARMcc, CCR, Cmp);
5778 }
5779
5780 if (getTargetMachine().Options.UnsafeFPMath &&
5781 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5782 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5783 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5784 return Result;
5785 }
5786
5787 ARMCC::CondCodes CondCode, CondCode2;
5788 FPCCToARMCC(CC, CondCode, CondCode2);
5789
5790 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5791 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5792 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5793 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5794 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5795 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5796 if (CondCode2 != ARMCC::AL) {
5797 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5798 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5799 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5800 }
5801 return Res;
5802}
5803
5804SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5805 SDValue Chain = Op.getOperand(0);
5806 SDValue Table = Op.getOperand(1);
5807 SDValue Index = Op.getOperand(2);
5808 SDLoc dl(Op);
5809
5810 EVT PTy = getPointerTy(DAG.getDataLayout());
5811 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5812 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5813 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5814 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5815 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5816 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5817 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5818 // which does another jump to the destination. This also makes it easier
5819 // to translate it to TBB / TBH later (Thumb2 only).
5820 // FIXME: This might not work if the function is extremely large.
5821 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5822 Addr, Op.getOperand(2), JTI);
5823 }
5824 if (isPositionIndependent() || Subtarget->isROPI()) {
5825 Addr =
5826 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5827 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5828 Chain = Addr.getValue(1);
5829 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5830 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5831 } else {
5832 Addr =
5833 DAG.getLoad(PTy, dl, Chain, Addr,
5834 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5835 Chain = Addr.getValue(1);
5836 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5837 }
5838}
5839
5840static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
5841 EVT VT = Op.getValueType();
5842 SDLoc dl(Op);
5843
5844 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5845 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5846 return Op;
5847 return DAG.UnrollVectorOp(Op.getNode());
5848 }
5849
5850 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5851
5852 EVT NewTy;
5853 const EVT OpTy = Op.getOperand(0).getValueType();
5854 if (OpTy == MVT::v4f32)
5855 NewTy = MVT::v4i32;
5856 else if (OpTy == MVT::v4f16 && HasFullFP16)
5857 NewTy = MVT::v4i16;
5858 else if (OpTy == MVT::v8f16 && HasFullFP16)
5859 NewTy = MVT::v8i16;
5860 else
5861 llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5861)
;
5862
5863 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5864 return DAG.UnrollVectorOp(Op.getNode());
5865
5866 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5867 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5868}
5869
5870SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5871 EVT VT = Op.getValueType();
5872 if (VT.isVector())
5873 return LowerVectorFP_TO_INT(Op, DAG);
5874
5875 bool IsStrict = Op->isStrictFPOpcode();
5876 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5877
5878 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5879 RTLIB::Libcall LC;
5880 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5881 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5882 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5883 Op.getValueType());
5884 else
5885 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5886 Op.getValueType());
5887 SDLoc Loc(Op);
5888 MakeLibCallOptions CallOptions;
5889 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5890 SDValue Result;
5891 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5892 CallOptions, Loc, Chain);
5893 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5894 }
5895
5896 // FIXME: Remove this when we have strict fp instruction selection patterns
5897 if (IsStrict) {
5898 SDLoc Loc(Op);
5899 SDValue Result =
5900 DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
5901 : ISD::FP_TO_UINT,
5902 Loc, Op.getValueType(), SrcVal);
5903 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5904 }
5905
5906 return Op;
5907}
5908
5909static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
5910 const ARMSubtarget *Subtarget) {
5911 EVT VT = Op.getValueType();
5912 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5913 EVT FromVT = Op.getOperand(0).getValueType();
5914
5915 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5916 return Op;
5917 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5918 Subtarget->hasFP64())
5919 return Op;
5920 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5921 Subtarget->hasFullFP16())
5922 return Op;
5923 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5924 Subtarget->hasMVEFloatOps())
5925 return Op;
5926 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5927 Subtarget->hasMVEFloatOps())
5928 return Op;
5929
5930 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5931 return SDValue();
5932
5933 SDLoc DL(Op);
5934 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5935 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5936 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5937 DAG.getValueType(VT.getScalarType()));
5938 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5939 DAG.getConstant((1 << BW) - 1, DL, VT));
5940 if (IsSigned)
5941 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5942 DAG.getConstant(-(1 << BW), DL, VT));
5943 return Max;
5944}
5945
5946static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
5947 EVT VT = Op.getValueType();
5948 SDLoc dl(Op);
5949
5950 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5951 if (VT.getVectorElementType() == MVT::f32)
5952 return Op;
5953 return DAG.UnrollVectorOp(Op.getNode());
5954 }
5955
5956 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast <bool> ((Op.getOperand(0).getValueType() ==
MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16)
&& "Invalid type for custom lowering!") ? void (0) :
__assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5958, __extension__
__PRETTY_FUNCTION__))
5957 Op.getOperand(0).getValueType() == MVT::v8i16) &&(static_cast <bool> ((Op.getOperand(0).getValueType() ==
MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16)
&& "Invalid type for custom lowering!") ? void (0) :
__assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5958, __extension__
__PRETTY_FUNCTION__))
5958 "Invalid type for custom lowering!")(static_cast <bool> ((Op.getOperand(0).getValueType() ==
MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16)
&& "Invalid type for custom lowering!") ? void (0) :
__assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5958, __extension__
__PRETTY_FUNCTION__))
;
5959
5960 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5961
5962 EVT DestVecType;
5963 if (VT == MVT::v4f32)
5964 DestVecType = MVT::v4i32;
5965 else if (VT == MVT::v4f16 && HasFullFP16)
5966 DestVecType = MVT::v4i16;
5967 else if (VT == MVT::v8f16 && HasFullFP16)
5968 DestVecType = MVT::v8i16;
5969 else
5970 return DAG.UnrollVectorOp(Op.getNode());
5971
5972 unsigned CastOpc;
5973 unsigned Opc;
5974 switch (Op.getOpcode()) {
5975 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5975)
;
5976 case ISD::SINT_TO_FP:
5977 CastOpc = ISD::SIGN_EXTEND;
5978 Opc = ISD::SINT_TO_FP;
5979 break;
5980 case ISD::UINT_TO_FP:
5981 CastOpc = ISD::ZERO_EXTEND;
5982 Opc = ISD::UINT_TO_FP;
5983 break;
5984 }
5985
5986 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5987 return DAG.getNode(Opc, dl, VT, Op);
5988}
5989
5990SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5991 EVT VT = Op.getValueType();
5992 if (VT.isVector())
5993 return LowerVectorINT_TO_FP(Op, DAG);
5994 if (isUnsupportedFloatingType(VT)) {
5995 RTLIB::Libcall LC;
5996 if (Op.getOpcode() == ISD::SINT_TO_FP)
5997 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5998 Op.getValueType());
5999 else
6000 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6001 Op.getValueType());
6002 MakeLibCallOptions CallOptions;
6003 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6004 CallOptions, SDLoc(Op)).first;
6005 }
6006
6007 return Op;
6008}
6009
6010SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6011 // Implement fcopysign with a fabs and a conditional fneg.
6012 SDValue Tmp0 = Op.getOperand(0);
6013 SDValue Tmp1 = Op.getOperand(1);
6014 SDLoc dl(Op);
6015 EVT VT = Op.getValueType();
6016 EVT SrcVT = Tmp1.getValueType();
6017 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6018 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6019 bool UseNEON = !InGPR && Subtarget->hasNEON();
6020
6021 if (UseNEON) {
6022 // Use VBSL to copy the sign bit.
6023 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6024 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6025 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6026 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6027 if (VT == MVT::f64)
6028 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6029 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6030 DAG.getConstant(32, dl, MVT::i32));
6031 else /*if (VT == MVT::f32)*/
6032 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6033 if (SrcVT == MVT::f32) {
6034 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6035 if (VT == MVT::f64)
6036 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6037 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6038 DAG.getConstant(32, dl, MVT::i32));
6039 } else if (VT == MVT::f32)
6040 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6041 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6042 DAG.getConstant(32, dl, MVT::i32));
6043 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6044 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6045
6046 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
6047 dl, MVT::i32);
6048 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6049 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6050 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6051
6052 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6053 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6054 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6055 if (VT == MVT::f32) {
6056 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6057 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6058 DAG.getConstant(0, dl, MVT::i32));
6059 } else {
6060 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6061 }
6062
6063 return Res;
6064 }
6065
6066 // Bitcast operand 1 to i32.
6067 if (SrcVT == MVT::f64)
6068 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6069 Tmp1).getValue(1);
6070 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6071
6072 // Or in the signbit with integer operations.
6073 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6074 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6075 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6076 if (VT == MVT::f32) {
6077 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6078 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6079 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6080 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6081 }
6082
6083 // f64: Or the high part with signbit and then combine two parts.
6084 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6085 Tmp0);
6086 SDValue Lo = Tmp0.getValue(0);
6087 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6088 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6089 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6090}
6091
6092SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6093 MachineFunction &MF = DAG.getMachineFunction();
6094 MachineFrameInfo &MFI = MF.getFrameInfo();
6095 MFI.setReturnAddressIsTaken(true);
6096
6097 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
6098 return SDValue();
6099
6100 EVT VT = Op.getValueType();
6101 SDLoc dl(Op);
6102 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6103 if (Depth) {
6104 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6105 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6106 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6107 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6108 MachinePointerInfo());
6109 }
6110
6111 // Return LR, which contains the return address. Mark it an implicit live-in.
6112 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6113 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6114}
6115
6116SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6117 const ARMBaseRegisterInfo &ARI =
6118 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6119 MachineFunction &MF = DAG.getMachineFunction();
6120 MachineFrameInfo &MFI = MF.getFrameInfo();
6121 MFI.setFrameAddressIsTaken(true);
6122
6123 EVT VT = Op.getValueType();
6124 SDLoc dl(Op); // FIXME probably not meaningful
6125 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6126 Register FrameReg = ARI.getFrameRegister(MF);
6127 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6128 while (Depth--)
6129 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6130 MachinePointerInfo());
6131 return FrameAddr;
6132}
6133
6134// FIXME? Maybe this could be a TableGen attribute on some registers and
6135// this table could be generated automatically from RegInfo.
6136Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6137 const MachineFunction &MF) const {
6138 Register Reg = StringSwitch<unsigned>(RegName)
6139 .Case("sp", ARM::SP)
6140 .Default(0);
6141 if (Reg)
6142 return Reg;
6143 report_fatal_error(Twine("Invalid register name \""
6144 + StringRef(RegName) + "\"."));
6145}
6146
6147// Result is 64 bit value so split into two 32 bit values and return as a
6148// pair of values.
6149static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
6150 SelectionDAG &DAG) {
6151 SDLoc DL(N);
6152
6153 // This function is only supposed to be called for i64 type destination.
6154 assert(N->getValueType(0) == MVT::i64(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6155, __extension__
__PRETTY_FUNCTION__))
6155 && "ExpandREAD_REGISTER called for non-i64 type result.")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6155, __extension__
__PRETTY_FUNCTION__))
;
6156
6157 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
6158 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6159 N->getOperand(0),
6160 N->getOperand(1));
6161
6162 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6163 Read.getValue(1)));
6164 Results.push_back(Read.getOperand(0));
6165}
6166
6167/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6168/// When \p DstVT, the destination type of \p BC, is on the vector
6169/// register bank and the source of bitcast, \p Op, operates on the same bank,
6170/// it might be possible to combine them, such that everything stays on the
6171/// vector register bank.
6172/// \p return The node that would replace \p BT, if the combine
6173/// is possible.
6174static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
6175 SelectionDAG &DAG) {
6176 SDValue Op = BC->getOperand(0);
6177 EVT DstVT = BC->getValueType(0);
6178
6179 // The only vector instruction that can produce a scalar (remember,
6180 // since the bitcast was about to be turned into VMOVDRR, the source
6181 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6182 // Moreover, we can do this combine only if there is one use.
6183 // Finally, if the destination type is not a vector, there is not
6184 // much point on forcing everything on the vector bank.
6185 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6186 !Op.hasOneUse())
6187 return SDValue();
6188
6189 // If the index is not constant, we will introduce an additional
6190 // multiply that will stick.
6191 // Give up in that case.
6192 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6193 if (!Index)
6194 return SDValue();
6195 unsigned DstNumElt = DstVT.getVectorNumElements();
6196
6197 // Compute the new index.
6198 const APInt &APIntIndex = Index->getAPIntValue();
6199 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6200 NewIndex *= APIntIndex;
6201 // Check if the new constant index fits into i32.
6202 if (NewIndex.getBitWidth() > 32)
6203 return SDValue();
6204
6205 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6206 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6207 SDLoc dl(Op);
6208 SDValue ExtractSrc = Op.getOperand(0);
6209 EVT VecVT = EVT::getVectorVT(
6210 *DAG.getContext(), DstVT.getScalarType(),
6211 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6212 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6213 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6214 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6215}
6216
6217/// ExpandBITCAST - If the target supports VFP, this function is called to
6218/// expand a bit convert where either the source or destination type is i64 to
6219/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6220/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6221/// vectors), since the legalizer won't know what to do with that.
6222SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6223 const ARMSubtarget *Subtarget) const {
6224 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6225 SDLoc dl(N);
6226 SDValue Op = N->getOperand(0);
6227
6228 // This function is only supposed to be called for i16 and i64 types, either
6229 // as the source or destination of the bit convert.
6230 EVT SrcVT = Op.getValueType();
6231 EVT DstVT = N->getValueType(0);
6232
6233 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6234 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6235 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6236 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6237
6238 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6239 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6240 return DAG.getNode(
6241 ISD::TRUNCATE, SDLoc(N), DstVT,
6242 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6243
6244 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6245 return SDValue();
6246
6247 // Turn i64->f64 into VMOVDRR.
6248 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6249 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6250 // if we can combine the bitcast with its source.
6251 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
6252 return Val;
6253 SDValue Lo, Hi;
6254 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6255 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6256 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6257 }
6258
6259 // Turn f64->i64 into VMOVRRD.
6260 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6261 SDValue Cvt;
6262 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6263 SrcVT.getVectorNumElements() > 1)
6264 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6265 DAG.getVTList(MVT::i32, MVT::i32),
6266 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6267 else
6268 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6269 DAG.getVTList(MVT::i32, MVT::i32), Op);
6270 // Merge the pieces into a single i64 value.
6271 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6272 }
6273
6274 return SDValue();
6275}
6276
6277/// getZeroVector - Returns a vector of specified type with all zero elements.
6278/// Zero vectors are used to represent vector negation and in those cases
6279/// will be implemented with the NEON VNEG instruction. However, VNEG does
6280/// not support i64 elements, so sometimes the zero vectors will need to be
6281/// explicitly constructed. Regardless, use a canonical VMOV to create the
6282/// zero vector.
6283static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6284 assert(VT.isVector() && "Expected a vector type")(static_cast <bool> (VT.isVector() && "Expected a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6284, __extension__
__PRETTY_FUNCTION__))
;
6285 // The canonical modified immediate encoding of a zero vector is....0!
6286 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6287 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6288 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6289 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6290}
6291
6292/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6293/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6294SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6295 SelectionDAG &DAG) const {
6296 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6296, __extension__
__PRETTY_FUNCTION__))
;
6297 EVT VT = Op.getValueType();
6298 unsigned VTBits = VT.getSizeInBits();
6299 SDLoc dl(Op);
6300 SDValue ShOpLo = Op.getOperand(0);
6301 SDValue ShOpHi = Op.getOperand(1);
6302 SDValue ShAmt = Op.getOperand(2);
6303 SDValue ARMcc;
6304 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6305 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6306
6307 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6307, __extension__
__PRETTY_FUNCTION__))
;
6308
6309 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6310 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6311 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6312 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6313 DAG.getConstant(VTBits, dl, MVT::i32));
6314 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6315 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6316 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6317 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6318 ISD::SETGE, ARMcc, DAG, dl);
6319 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6320 ARMcc, CCR, CmpLo);
6321
6322 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6323 SDValue HiBigShift = Opc == ISD::SRA
6324 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6325 DAG.getConstant(VTBits - 1, dl, VT))
6326 : DAG.getConstant(0, dl, VT);
6327 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6328 ISD::SETGE, ARMcc, DAG, dl);
6329 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6330 ARMcc, CCR, CmpHi);
6331
6332 SDValue Ops[2] = { Lo, Hi };
6333 return DAG.getMergeValues(Ops, dl);
6334}
6335
6336/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6337/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6338SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6339 SelectionDAG &DAG) const {
6340 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6340, __extension__
__PRETTY_FUNCTION__))
;
6341 EVT VT = Op.getValueType();
6342 unsigned VTBits = VT.getSizeInBits();
6343 SDLoc dl(Op);
6344 SDValue ShOpLo = Op.getOperand(0);
6345 SDValue ShOpHi = Op.getOperand(1);
6346 SDValue ShAmt = Op.getOperand(2);
6347 SDValue ARMcc;
6348 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6349
6350 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6350, __extension__
__PRETTY_FUNCTION__))
;
6351 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6352 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6353 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6354 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6355 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6356
6357 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6358 DAG.getConstant(VTBits, dl, MVT::i32));
6359 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6360 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6361 ISD::SETGE, ARMcc, DAG, dl);
6362 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6363 ARMcc, CCR, CmpHi);
6364
6365 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6366 ISD::SETGE, ARMcc, DAG, dl);
6367 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6368 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6369 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6370
6371 SDValue Ops[2] = { Lo, Hi };
6372 return DAG.getMergeValues(Ops, dl);
6373}
6374
6375SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6376 SelectionDAG &DAG) const {
6377 // The rounding mode is in bits 23:22 of the FPSCR.
6378 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6379 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6380 // so that the shift + and get folded into a bitfield extract.
6381 SDLoc dl(Op);
6382 SDValue Chain = Op.getOperand(0);
6383 SDValue Ops[] = {Chain,
6384 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6385
6386 SDValue FPSCR =
6387 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6388 Chain = FPSCR.getValue(1);
6389 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6390 DAG.getConstant(1U << 22, dl, MVT::i32));
6391 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6392 DAG.getConstant(22, dl, MVT::i32));
6393 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6394 DAG.getConstant(3, dl, MVT::i32));
6395 return DAG.getMergeValues({And, Chain}, dl);
6396}
6397
6398SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6399 SelectionDAG &DAG) const {
6400 SDLoc DL(Op);
6401 SDValue Chain = Op->getOperand(0);
6402 SDValue RMValue = Op->getOperand(1);
6403
6404 // The rounding mode is in bits 23:22 of the FPSCR.
6405 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6406 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6407 // ((arg - 1) & 3) << 22).
6408 //
6409 // It is expected that the argument of llvm.set.rounding is within the
6410 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6411 // responsibility of the code generated llvm.set.rounding to ensure this
6412 // condition.
6413
6414 // Calculate new value of FPSCR[23:22].
6415 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6416 DAG.getConstant(1, DL, MVT::i32));
6417 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6418 DAG.getConstant(0x3, DL, MVT::i32));
6419 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6420 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6421
6422 // Get current value of FPSCR.
6423 SDValue Ops[] = {Chain,
6424 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6425 SDValue FPSCR =
6426 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6427 Chain = FPSCR.getValue(1);
6428 FPSCR = FPSCR.getValue(0);
6429
6430 // Put new rounding mode into FPSCR[23:22].
6431 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6432 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6433 DAG.getConstant(RMMask, DL, MVT::i32));
6434 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6435 SDValue Ops2[] = {
6436 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6437 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6438}
6439
6440static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
6441 const ARMSubtarget *ST) {
6442 SDLoc dl(N);
6443 EVT VT = N->getValueType(0);
6444 if (VT.isVector() && ST->hasNEON()) {
6445
6446 // Compute the least significant set bit: LSB = X & -X
6447 SDValue X = N->getOperand(0);
6448 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6449 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6450
6451 EVT ElemTy = VT.getVectorElementType();
6452
6453 if (ElemTy == MVT::i8) {
6454 // Compute with: cttz(x) = ctpop(lsb - 1)
6455 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6456 DAG.getTargetConstant(1, dl, ElemTy));
6457 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6458 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6459 }
6460
6461 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6462 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6463 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6464 unsigned NumBits = ElemTy.getSizeInBits();
6465 SDValue WidthMinus1 =
6466 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6467 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6468 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6469 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6470 }
6471
6472 // Compute with: cttz(x) = ctpop(lsb - 1)
6473
6474 // Compute LSB - 1.
6475 SDValue Bits;
6476 if (ElemTy == MVT::i64) {
6477 // Load constant 0xffff'ffff'ffff'ffff to register.
6478 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6479 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6480 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6481 } else {
6482 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6483 DAG.getTargetConstant(1, dl, ElemTy));
6484 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6485 }
6486 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6487 }
6488
6489 if (!ST->hasV6T2Ops())
6490 return SDValue();
6491
6492 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6493 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6494}
6495
6496static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
6497 const ARMSubtarget *ST) {
6498 EVT VT = N->getValueType(0);
6499 SDLoc DL(N);
6500
6501 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")(static_cast <bool> (ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? void (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6501, __extension__
__PRETTY_FUNCTION__))
;
6502 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6504, __extension__
__PRETTY_FUNCTION__))
6503 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6504, __extension__
__PRETTY_FUNCTION__))
6504 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6504, __extension__
__PRETTY_FUNCTION__))
;
6505
6506 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6507 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6508 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6509 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6510
6511 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6512 unsigned EltSize = 8;
6513 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6514 while (EltSize != VT.getScalarSizeInBits()) {
6515 SmallVector<SDValue, 8> Ops;
6516 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6517 TLI.getPointerTy(DAG.getDataLayout())));
6518 Ops.push_back(Res);
6519
6520 EltSize *= 2;
6521 NumElts /= 2;
6522 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6523 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6524 }
6525
6526 return Res;
6527}
6528
6529/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6530/// operand of a vector shift operation, where all the elements of the
6531/// build_vector must have the same constant integer value.
6532static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6533 // Ignore bit_converts.
6534 while (Op.getOpcode() == ISD::BITCAST)
6535 Op = Op.getOperand(0);
6536 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6537 APInt SplatBits, SplatUndef;
6538 unsigned SplatBitSize;
6539 bool HasAnyUndefs;
6540 if (!BVN ||
6541 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6542 ElementBits) ||
6543 SplatBitSize > ElementBits)
6544 return false;
6545 Cnt = SplatBits.getSExtValue();
6546 return true;
6547}
6548
6549/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6550/// operand of a vector shift left operation. That value must be in the range:
6551/// 0 <= Value < ElementBits for a left shift; or
6552/// 0 <= Value <= ElementBits for a long left shift.
6553static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6554 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6554, __extension__
__PRETTY_FUNCTION__))
;
6555 int64_t ElementBits = VT.getScalarSizeInBits();
6556 if (!getVShiftImm(Op, ElementBits, Cnt))
6557 return false;
6558 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6559}
6560
6561/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6562/// operand of a vector shift right operation. For a shift opcode, the value
6563/// is positive, but for an intrinsic the value count must be negative. The
6564/// absolute value must be in the range:
6565/// 1 <= |Value| <= ElementBits for a right shift; or
6566/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6567static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6568 int64_t &Cnt) {
6569 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6569, __extension__
__PRETTY_FUNCTION__))
;
6570 int64_t ElementBits = VT.getScalarSizeInBits();
6571 if (!getVShiftImm(Op, ElementBits, Cnt))
6572 return false;
6573 if (!isIntrinsic)
6574 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6575 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6576 Cnt = -Cnt;
6577 return true;
6578 }
6579 return false;
6580}
6581
6582static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
6583 const ARMSubtarget *ST) {
6584 EVT VT = N->getValueType(0);
6585 SDLoc dl(N);
6586 int64_t Cnt;
6587
6588 if (!VT.isVector())
6589 return SDValue();
6590
6591 // We essentially have two forms here. Shift by an immediate and shift by a
6592 // vector register (there are also shift by a gpr, but that is just handled
6593 // with a tablegen pattern). We cannot easily match shift by an immediate in
6594 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6595 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6596 // signed or unsigned, and a negative shift indicates a shift right).
6597 if (N->getOpcode() == ISD::SHL) {
6598 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6599 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6600 DAG.getConstant(Cnt, dl, MVT::i32));
6601 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6602 N->getOperand(1));
6603 }
6604
6605 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6606, __extension__
__PRETTY_FUNCTION__))
6606 "unexpected vector shift opcode")(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6606, __extension__
__PRETTY_FUNCTION__))
;
6607
6608 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6609 unsigned VShiftOpc =
6610 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6611 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6612 DAG.getConstant(Cnt, dl, MVT::i32));
6613 }
6614
6615 // Other right shifts we don't have operations for (we use a shift left by a
6616 // negative number).
6617 EVT ShiftVT = N->getOperand(1).getValueType();
6618 SDValue NegatedCount = DAG.getNode(
6619 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6620 unsigned VShiftOpc =
6621 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6622 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6623}
6624
6625static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
6626 const ARMSubtarget *ST) {
6627 EVT VT = N->getValueType(0);
6628 SDLoc dl(N);
6629
6630 // We can get here for a node like i32 = ISD::SHL i32, i64
6631 if (VT != MVT::i64)
6632 return SDValue();
6633
6634 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL
) && "Unknown shift to lower!") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6636, __extension__
__PRETTY_FUNCTION__))
6635 N->getOpcode() == ISD::SHL) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL
) && "Unknown shift to lower!") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6636, __extension__
__PRETTY_FUNCTION__))
6636 "Unknown shift to lower!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL
) && "Unknown shift to lower!") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6636, __extension__
__PRETTY_FUNCTION__))
;
6637
6638 unsigned ShOpc = N->getOpcode();
6639 if (ST->hasMVEIntegerOps()) {
6640 SDValue ShAmt = N->getOperand(1);
6641 unsigned ShPartsOpc = ARMISD::LSLL;
6642 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6643
6644 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6645 // then do the default optimisation
6646 if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
6647 (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
6648 return SDValue();
6649
6650 // Extract the lower 32 bits of the shift amount if it's not an i32
6651 if (ShAmt->getValueType(0) != MVT::i32)
6652 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6653
6654 if (ShOpc == ISD::SRL) {
6655 if (!Con)
6656 // There is no t2LSRLr instruction so negate and perform an lsll if the
6657 // shift amount is in a register, emulating a right shift.
6658 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6659 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6660 else
6661 // Else generate an lsrl on the immediate shift amount
6662 ShPartsOpc = ARMISD::LSRL;
6663 } else if (ShOpc == ISD::SRA)
6664 ShPartsOpc = ARMISD::ASRL;
6665
6666 // Split Lower/Upper 32 bits of the destination/source
6667 SDValue Lo, Hi;
6668 std::tie(Lo, Hi) =
6669 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6670 // Generate the shift operation as computed above
6671 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6672 ShAmt);
6673 // The upper 32 bits come from the second return value of lsll
6674 Hi = SDValue(Lo.getNode(), 1);
6675 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6676 }
6677
6678 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6679 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6680 return SDValue();
6681
6682 // If we are in thumb mode, we don't have RRX.
6683 if (ST->isThumb1Only())
6684 return SDValue();
6685
6686 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6687 SDValue Lo, Hi;
6688 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6689
6690 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6691 // captures the result into a carry flag.
6692 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6693 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6694
6695 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6696 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6697
6698 // Merge the pieces into a single i64 value.
6699 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6700}
6701
6702static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
6703 const ARMSubtarget *ST) {
6704 bool Invert = false;
6705 bool Swap = false;
6706 unsigned Opc = ARMCC::AL;
6707
6708 SDValue Op0 = Op.getOperand(0);
6709 SDValue Op1 = Op.getOperand(1);
6710 SDValue CC = Op.getOperand(2);
6711 EVT VT = Op.getValueType();
6712 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6713 SDLoc dl(Op);
6714
6715 EVT CmpVT;
6716 if (ST->hasNEON())
6717 CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
6718 else {
6719 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No hardware support for integer vector comparison!") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No hardware support for integer vector comparison!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6720, __extension__
__PRETTY_FUNCTION__))
6720 "No hardware support for integer vector comparison!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No hardware support for integer vector comparison!") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No hardware support for integer vector comparison!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6720, __extension__
__PRETTY_FUNCTION__))
;
6721
6722 if (Op.getValueType().getVectorElementType() != MVT::i1)
6723 return SDValue();
6724
6725 // Make sure we expand floating point setcc to scalar if we do not have
6726 // mve.fp, so that we can handle them from there.
6727 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6728 return SDValue();
6729
6730 CmpVT = VT;
6731 }
6732
6733 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6734 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6735 // Special-case integer 64-bit equality comparisons. They aren't legal,
6736 // but they can be lowered with a few vector instructions.
6737 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6738 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6739 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6740 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6741 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6742 DAG.getCondCode(ISD::SETEQ));
6743 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6744 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6745 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6746 if (SetCCOpcode == ISD::SETNE)
6747 Merged = DAG.getNOT(dl, Merged, CmpVT);
6748 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6749 return Merged;
6750 }
6751
6752 if (CmpVT.getVectorElementType() == MVT::i64)
6753 // 64-bit comparisons are not legal in general.
6754 return SDValue();
6755
6756 if (Op1.getValueType().isFloatingPoint()) {
6757 switch (SetCCOpcode) {
6758 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6758)
;
6759 case ISD::SETUNE:
6760 case ISD::SETNE:
6761 if (ST->hasMVEFloatOps()) {
6762 Opc = ARMCC::NE; break;
6763 } else {
6764 Invert = true; [[fallthrough]];
6765 }
6766 case ISD::SETOEQ:
6767 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6768 case ISD::SETOLT:
6769 case ISD::SETLT: Swap = true; [[fallthrough]];
6770 case ISD::SETOGT:
6771 case ISD::SETGT: Opc = ARMCC::GT; break;
6772 case ISD::SETOLE:
6773 case ISD::SETLE: Swap = true; [[fallthrough]];
6774 case ISD::SETOGE:
6775 case ISD::SETGE: Opc = ARMCC::GE; break;
6776 case ISD::SETUGE: Swap = true; [[fallthrough]];
6777 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6778 case ISD::SETUGT: Swap = true; [[fallthrough]];
6779 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6780 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6781 case ISD::SETONE: {
6782 // Expand this to (OLT | OGT).
6783 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6784 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6785 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6786 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6787 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6788 if (Invert)
6789 Result = DAG.getNOT(dl, Result, VT);
6790 return Result;
6791 }
6792 case ISD::SETUO: Invert = true; [[fallthrough]];
6793 case ISD::SETO: {
6794 // Expand this to (OLT | OGE).
6795 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6796 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6797 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6798 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6799 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6800 if (Invert)
6801 Result = DAG.getNOT(dl, Result, VT);
6802 return Result;
6803 }
6804 }
6805 } else {
6806 // Integer comparisons.
6807 switch (SetCCOpcode) {
6808 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6808)
;
6809 case ISD::SETNE:
6810 if (ST->hasMVEIntegerOps()) {
6811 Opc = ARMCC::NE; break;
6812 } else {
6813 Invert = true; [[fallthrough]];
6814 }
6815 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6816 case ISD::SETLT: Swap = true; [[fallthrough]];
6817 case ISD::SETGT: Opc = ARMCC::GT; break;
6818 case ISD::SETLE: Swap = true; [[fallthrough]];
6819 case ISD::SETGE: Opc = ARMCC::GE; break;
6820 case ISD::SETULT: Swap = true; [[fallthrough]];
6821 case ISD::SETUGT: Opc = ARMCC::HI; break;
6822 case ISD::SETULE: Swap = true; [[fallthrough]];
6823 case ISD::SETUGE: Opc = ARMCC::HS; break;
6824 }
6825
6826 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6827 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6828 SDValue AndOp;
6829 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
6830 AndOp = Op0;
6831 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6832 AndOp = Op1;
6833
6834 // Ignore bitconvert.
6835 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6836 AndOp = AndOp.getOperand(0);
6837
6838 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6839 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6840 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6841 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6842 if (!Invert)
6843 Result = DAG.getNOT(dl, Result, VT);
6844 return Result;
6845 }
6846 }
6847 }
6848
6849 if (Swap)
6850 std::swap(Op0, Op1);
6851
6852 // If one of the operands is a constant vector zero, attempt to fold the
6853 // comparison to a specialized compare-against-zero form.
6854 if (ISD::isBuildVectorAllZeros(Op0.getNode()) &&
6855 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6856 Opc == ARMCC::NE)) {
6857 if (Opc == ARMCC::GE)
6858 Opc = ARMCC::LE;
6859 else if (Opc == ARMCC::GT)
6860 Opc = ARMCC::LT;
6861 std::swap(Op0, Op1);
6862 }
6863
6864 SDValue Result;
6865 if (ISD::isBuildVectorAllZeros(Op1.getNode()) &&
6866 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6867 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6868 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6869 DAG.getConstant(Opc, dl, MVT::i32));
6870 else
6871 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6872 DAG.getConstant(Opc, dl, MVT::i32));
6873
6874 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6875
6876 if (Invert)
6877 Result = DAG.getNOT(dl, Result, VT);
6878
6879 return Result;
6880}
6881
6882static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
6883 SDValue LHS = Op.getOperand(0);
6884 SDValue RHS = Op.getOperand(1);
6885 SDValue Carry = Op.getOperand(2);
6886 SDValue Cond = Op.getOperand(3);
6887 SDLoc DL(Op);
6888
6889 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.")(static_cast <bool> (LHS.getSimpleValueType().isInteger
() && "SETCCCARRY is integer only.") ? void (0) : __assert_fail
("LHS.getSimpleValueType().isInteger() && \"SETCCCARRY is integer only.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6889, __extension__
__PRETTY_FUNCTION__))
;
6890
6891 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6892 // have to invert the carry first.
6893 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6894 DAG.getConstant(1, DL, MVT::i32), Carry);
6895 // This converts the boolean value carry into the carry flag.
6896 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6897
6898 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6899 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6900
6901 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6902 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6903 SDValue ARMcc = DAG.getConstant(
6904 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6905 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6906 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6907 Cmp.getValue(1), SDValue());
6908 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6909 CCR, Chain.getValue(1));
6910}
6911
6912/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6913/// valid vector constant for a NEON or MVE instruction with a "modified
6914/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6915static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6916 unsigned SplatBitSize, SelectionDAG &DAG,
6917 const SDLoc &dl, EVT &VT, EVT VectorVT,
6918 VMOVModImmType type) {
6919 unsigned OpCmode, Imm;
6920 bool is128Bits = VectorVT.is128BitVector();
6921
6922 // SplatBitSize is set to the smallest size that splats the vector, so a
6923 // zero vector will always have SplatBitSize == 8. However, NEON modified
6924 // immediate instructions others than VMOV do not support the 8-bit encoding
6925 // of a zero vector, and the default encoding of zero is supposed to be the
6926 // 32-bit version.
6927 if (SplatBits == 0)
6928 SplatBitSize = 32;
6929
6930 switch (SplatBitSize) {
6931 case 8:
6932 if (type != VMOVModImm)
6933 return SDValue();
6934 // Any 1-byte value is OK. Op=0, Cmode=1110.
6935 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(static_cast <bool> ((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big") ? void (0) : __assert_fail
("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6935, __extension__
__PRETTY_FUNCTION__))
;
6936 OpCmode = 0xe;
6937 Imm = SplatBits;
6938 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6939 break;
6940
6941 case 16:
6942 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6943 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6944 if ((SplatBits & ~0xff) == 0) {
6945 // Value = 0x00nn: Op=x, Cmode=100x.
6946 OpCmode = 0x8;
6947 Imm = SplatBits;
6948 break;
6949 }
6950 if ((SplatBits & ~0xff00) == 0) {
6951 // Value = 0xnn00: Op=x, Cmode=101x.
6952 OpCmode = 0xa;
6953 Imm = SplatBits >> 8;
6954 break;
6955 }
6956 return SDValue();
6957
6958 case 32:
6959 // NEON's 32-bit VMOV supports splat values where:
6960 // * only one byte is nonzero, or
6961 // * the least significant byte is 0xff and the second byte is nonzero, or
6962 // * the least significant 2 bytes are 0xff and the third is nonzero.
6963 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6964 if ((SplatBits & ~0xff) == 0) {
6965 // Value = 0x000000nn: Op=x, Cmode=000x.
6966 OpCmode = 0;
6967 Imm = SplatBits;
6968 break;
6969 }
6970 if ((SplatBits & ~0xff00) == 0) {
6971 // Value = 0x0000nn00: Op=x, Cmode=001x.
6972 OpCmode = 0x2;
6973 Imm = SplatBits >> 8;
6974 break;
6975 }
6976 if ((SplatBits & ~0xff0000) == 0) {
6977 // Value = 0x00nn0000: Op=x, Cmode=010x.
6978 OpCmode = 0x4;
6979 Imm = SplatBits >> 16;
6980 break;
6981 }
6982 if ((SplatBits & ~0xff000000) == 0) {
6983 // Value = 0xnn000000: Op=x, Cmode=011x.
6984 OpCmode = 0x6;
6985 Imm = SplatBits >> 24;
6986 break;
6987 }
6988
6989 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6990 if (type == OtherModImm) return SDValue();
6991
6992 if ((SplatBits & ~0xffff) == 0 &&
6993 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6994 // Value = 0x0000nnff: Op=x, Cmode=1100.
6995 OpCmode = 0xc;
6996 Imm = SplatBits >> 8;
6997 break;
6998 }
6999
7000 // cmode == 0b1101 is not supported for MVE VMVN
7001 if (type == MVEVMVNModImm)
7002 return SDValue();
7003
7004 if ((SplatBits & ~0xffffff) == 0 &&
7005 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7006 // Value = 0x00nnffff: Op=x, Cmode=1101.
7007 OpCmode = 0xd;
7008 Imm = SplatBits >> 16;
7009 break;
7010 }
7011
7012 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7013 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7014 // VMOV.I32. A (very) minor optimization would be to replicate the value
7015 // and fall through here to test for a valid 64-bit splat. But, then the
7016 // caller would also need to check and handle the change in size.
7017 return SDValue();
7018
7019 case 64: {
7020 if (type != VMOVModImm)
7021 return SDValue();
7022 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7023 uint64_t BitMask = 0xff;
7024 unsigned ImmMask = 1;
7025 Imm = 0;
7026 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7027 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7028 Imm |= ImmMask;
7029 } else if ((SplatBits & BitMask) != 0) {
7030 return SDValue();
7031 }
7032 BitMask <<= 8;
7033 ImmMask <<= 1;
7034 }
7035
7036 if (DAG.getDataLayout().isBigEndian()) {
7037 // Reverse the order of elements within the vector.
7038 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7039 unsigned Mask = (1 << BytesPerElem) - 1;
7040 unsigned NumElems = 8 / BytesPerElem;
7041 unsigned NewImm = 0;
7042 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7043 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7044 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7045 }
7046 Imm = NewImm;
7047 }
7048
7049 // Op=1, Cmode=1110.
7050 OpCmode = 0x1e;
7051 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7052 break;
7053 }
7054
7055 default:
7056 llvm_unreachable("unexpected size for isVMOVModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isVMOVModifiedImm"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7056)
;
7057 }
7058
7059 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7060 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7061}
7062
7063SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7064 const ARMSubtarget *ST) const {
7065 EVT VT = Op.getValueType();
7066 bool IsDouble = (VT == MVT::f64);
7067 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7068 const APFloat &FPVal = CFP->getValueAPF();
7069
7070 // Prevent floating-point constants from using literal loads
7071 // when execute-only is enabled.
7072 if (ST->genExecuteOnly()) {
7073 // If we can represent the constant as an immediate, don't lower it
7074 if (isFPImmLegal(FPVal, VT))
7075 return Op;
7076 // Otherwise, construct as integer, and move to float register
7077 APInt INTVal = FPVal.bitcastToAPInt();
7078 SDLoc DL(CFP);
7079 switch (VT.getSimpleVT().SimpleTy) {
7080 default:
7081 llvm_unreachable("Unknown floating point type!")::llvm::llvm_unreachable_internal("Unknown floating point type!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7081)
;
7082 break;
7083 case MVT::f64: {
7084 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7085 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7086 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7087 }
7088 case MVT::f32:
7089 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7090 DAG.getConstant(INTVal, DL, MVT::i32));
7091 }
7092 }
7093
7094 if (!ST->hasVFP3Base())
7095 return SDValue();
7096
7097 // Use the default (constant pool) lowering for double constants when we have
7098 // an SP-only FPU
7099 if (IsDouble && !Subtarget->hasFP64())
7100 return SDValue();
7101
7102 // Try splatting with a VMOV.f32...
7103 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7104
7105 if (ImmVal != -1) {
7106 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7107 // We have code in place to select a valid ConstantFP already, no need to
7108 // do any mangling.
7109 return Op;
7110 }
7111
7112 // It's a float and we are trying to use NEON operations where
7113 // possible. Lower it to a splat followed by an extract.
7114 SDLoc DL(Op);
7115 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7116 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7117 NewVal);
7118 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7119 DAG.getConstant(0, DL, MVT::i32));
7120 }
7121
7122 // The rest of our options are NEON only, make sure that's allowed before
7123 // proceeding..
7124 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7125 return SDValue();
7126
7127 EVT VMovVT;
7128 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7129
7130 // It wouldn't really be worth bothering for doubles except for one very
7131 // important value, which does happen to match: 0.0. So make sure we don't do
7132 // anything stupid.
7133 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7134 return SDValue();
7135
7136 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7137 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7138 VMovVT, VT, VMOVModImm);
7139 if (NewVal != SDValue()) {
7140 SDLoc DL(Op);
7141 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7142 NewVal);
7143 if (IsDouble)
7144 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7145
7146 // It's a float: cast and extract a vector element.
7147 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7148 VecConstant);
7149 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7150 DAG.getConstant(0, DL, MVT::i32));
7151 }
7152
7153 // Finally, try a VMVN.i32
7154 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7155 VT, VMVNModImm);
7156 if (NewVal != SDValue()) {
7157 SDLoc DL(Op);
7158 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7159
7160 if (IsDouble)
7161 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7162
7163 // It's a float: cast and extract a vector element.
7164 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7165 VecConstant);
7166 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7167 DAG.getConstant(0, DL, MVT::i32));
7168 }
7169
7170 return SDValue();
7171}
7172
7173// check if an VEXT instruction can handle the shuffle mask when the
7174// vector sources of the shuffle are the same.
7175static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7176 unsigned NumElts = VT.getVectorNumElements();
7177
7178 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7179 if (M[0] < 0)
7180 return false;
7181
7182 Imm = M[0];
7183
7184 // If this is a VEXT shuffle, the immediate value is the index of the first
7185 // element. The other shuffle indices must be the successive elements after
7186 // the first one.
7187 unsigned ExpectedElt = Imm;
7188 for (unsigned i = 1; i < NumElts; ++i) {
7189 // Increment the expected index. If it wraps around, just follow it
7190 // back to index zero and keep going.
7191 ++ExpectedElt;
7192 if (ExpectedElt == NumElts)
7193 ExpectedElt = 0;
7194
7195 if (M[i] < 0) continue; // ignore UNDEF indices
7196 if (ExpectedElt != static_cast<unsigned>(M[i]))
7197 return false;
7198 }
7199
7200 return true;
7201}
7202
7203static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7204 bool &ReverseVEXT, unsigned &Imm) {
7205 unsigned NumElts = VT.getVectorNumElements();
7206 ReverseVEXT = false;
7207
7208 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7209 if (M[0] < 0)
7210 return false;
7211
7212 Imm = M[0];
7213
7214 // If this is a VEXT shuffle, the immediate value is the index of the first
7215 // element. The other shuffle indices must be the successive elements after
7216 // the first one.
7217 unsigned ExpectedElt = Imm;
7218 for (unsigned i = 1; i < NumElts; ++i) {
7219 // Increment the expected index. If it wraps around, it may still be
7220 // a VEXT but the source vectors must be swapped.
7221 ExpectedElt += 1;
7222 if (ExpectedElt == NumElts * 2) {
7223 ExpectedElt = 0;
7224 ReverseVEXT = true;
7225 }
7226
7227 if (M[i] < 0) continue; // ignore UNDEF indices
7228 if (ExpectedElt != static_cast<unsigned>(M[i]))
7229 return false;
7230 }
7231
7232 // Adjust the index value if the source operands will be swapped.
7233 if (ReverseVEXT)
7234 Imm -= NumElts;
7235
7236 return true;
7237}
7238
7239static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7240 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7241 // range, then 0 is placed into the resulting vector. So pretty much any mask
7242 // of 8 elements can work here.
7243 return VT == MVT::v8i8 && M.size() == 8;
7244}
7245
7246static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7247 unsigned Index) {
7248 if (Mask.size() == Elements * 2)
31
Assuming the condition is true
32
Taking true branch
7249 return Index / Elements;
33
Division by zero
7250 return Mask[Index] == 0 ? 0 : 1;
7251}
7252
7253// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7254// checking that pairs of elements in the shuffle mask represent the same index
7255// in each vector, incrementing the expected index by 2 at each step.
7256// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7257// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7258// v2={e,f,g,h}
7259// WhichResult gives the offset for each element in the mask based on which
7260// of the two results it belongs to.
7261//
7262// The transpose can be represented either as:
7263// result1 = shufflevector v1, v2, result1_shuffle_mask
7264// result2 = shufflevector v1, v2, result2_shuffle_mask
7265// where v1/v2 and the shuffle masks have the same number of elements
7266// (here WhichResult (see below) indicates which result is being checked)
7267//
7268// or as:
7269// results = shufflevector v1, v2, shuffle_mask
7270// where both results are returned in one vector and the shuffle mask has twice
7271// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7272// want to check the low half and high half of the shuffle mask as if it were
7273// the other case
7274static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7275 unsigned EltSz = VT.getScalarSizeInBits();
7276 if (EltSz == 64)
11
Assuming 'EltSz' is not equal to 64
12
Taking false branch
7277 return false;
7278
7279 unsigned NumElts = VT.getVectorNumElements();
13
Calling 'EVT::getVectorNumElements'
20
Returning from 'EVT::getVectorNumElements'
21
'NumElts' initialized here
7280 if (M.size() != NumElts && M.size() != NumElts*2)
22
Assuming the condition is false
7281 return false;
7282
7283 // If the mask is twice as long as the input vector then we need to check the
7284 // upper and lower parts of the mask with a matching value for WhichResult
7285 // FIXME: A mask with only even values will be rejected in case the first
7286 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7287 // M[0] is used to determine WhichResult
7288 for (unsigned i = 0; i < M.size(); i += NumElts) {
23
Assuming the condition is true
24
Loop condition is true. Entering loop body
27
Assuming the condition is true
28
Loop condition is true. Entering loop body
7289 WhichResult = SelectPairHalf(NumElts, M, i);
29
Passing the value 0 via 1st parameter 'Elements'
30
Calling 'SelectPairHalf'
7290 for (unsigned j = 0; j < NumElts; j += 2) {
25
Assuming 'j' is >= 'NumElts'
26
Loop condition is false. Execution continues on line 7288
7291 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7292 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7293 return false;
7294 }
7295 }
7296
7297 if (M.size() == NumElts*2)
7298 WhichResult = 0;
7299
7300 return true;
7301}
7302
7303/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7304/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7305/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7306static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7307 unsigned EltSz = VT.getScalarSizeInBits();
7308 if (EltSz == 64)
7309 return false;
7310
7311 unsigned NumElts = VT.getVectorNumElements();
7312 if (M.size() != NumElts && M.size() != NumElts*2)
7313 return false;
7314
7315 for (unsigned i = 0; i < M.size(); i += NumElts) {
7316 WhichResult = SelectPairHalf(NumElts, M, i);
7317 for (unsigned j = 0; j < NumElts; j += 2) {
7318 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7319 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7320 return false;
7321 }
7322 }
7323
7324 if (M.size() == NumElts*2)
7325 WhichResult = 0;
7326
7327 return true;
7328}
7329
7330// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7331// that the mask elements are either all even and in steps of size 2 or all odd
7332// and in steps of size 2.
7333// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7334// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7335// v2={e,f,g,h}
7336// Requires similar checks to that of isVTRNMask with
7337// respect the how results are returned.
7338static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7339 unsigned EltSz = VT.getScalarSizeInBits();
7340 if (EltSz == 64)
7341 return false;
7342
7343 unsigned NumElts = VT.getVectorNumElements();
7344 if (M.size() != NumElts && M.size() != NumElts*2)
7345 return false;
7346
7347 for (unsigned i = 0; i < M.size(); i += NumElts) {
7348 WhichResult = SelectPairHalf(NumElts, M, i);
7349 for (unsigned j = 0; j < NumElts; ++j) {
7350 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7351 return false;
7352 }
7353 }
7354
7355 if (M.size() == NumElts*2)
7356 WhichResult = 0;
7357
7358 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7359 if (VT.is64BitVector() && EltSz == 32)
7360 return false;
7361
7362 return true;
7363}
7364
7365/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7366/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7367/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7368static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7369 unsigned EltSz = VT.getScalarSizeInBits();
7370 if (EltSz == 64)
7371 return false;
7372
7373 unsigned NumElts = VT.getVectorNumElements();
7374 if (M.size() != NumElts && M.size() != NumElts*2)
7375 return false;
7376
7377 unsigned Half = NumElts / 2;
7378 for (unsigned i = 0; i < M.size(); i += NumElts) {
7379 WhichResult = SelectPairHalf(NumElts, M, i);
7380 for (unsigned j = 0; j < NumElts; j += Half) {
7381 unsigned Idx = WhichResult;
7382 for (unsigned k = 0; k < Half; ++k) {
7383 int MIdx = M[i + j + k];
7384 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7385 return false;
7386 Idx += 2;
7387 }
7388 }
7389 }
7390
7391 if (M.size() == NumElts*2)
7392 WhichResult = 0;
7393
7394 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7395 if (VT.is64BitVector() && EltSz == 32)
7396 return false;
7397
7398 return true;
7399}
7400
7401// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7402// that pairs of elements of the shufflemask represent the same index in each
7403// vector incrementing sequentially through the vectors.
7404// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7405// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7406// v2={e,f,g,h}
7407// Requires similar checks to that of isVTRNMask with respect the how results
7408// are returned.
7409static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7410 unsigned EltSz = VT.getScalarSizeInBits();
7411 if (EltSz == 64)
7412 return false;
7413
7414 unsigned NumElts = VT.getVectorNumElements();
7415 if (M.size() != NumElts && M.size() != NumElts*2)
7416 return false;
7417
7418 for (unsigned i = 0; i < M.size(); i += NumElts) {
7419 WhichResult = SelectPairHalf(NumElts, M, i);
7420 unsigned Idx = WhichResult * NumElts / 2;
7421 for (unsigned j = 0; j < NumElts; j += 2) {
7422 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7423 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7424 return false;
7425 Idx += 1;
7426 }
7427 }
7428
7429 if (M.size() == NumElts*2)
7430 WhichResult = 0;
7431
7432 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7433 if (VT.is64BitVector() && EltSz == 32)
7434 return false;
7435
7436 return true;
7437}
7438
7439/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7440/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7441/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7442static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7443 unsigned EltSz = VT.getScalarSizeInBits();
7444 if (EltSz == 64)
7445 return false;
7446
7447 unsigned NumElts = VT.getVectorNumElements();
7448 if (M.size() != NumElts && M.size() != NumElts*2)
7449 return false;
7450
7451 for (unsigned i = 0; i < M.size(); i += NumElts) {
7452 WhichResult = SelectPairHalf(NumElts, M, i);
7453 unsigned Idx = WhichResult * NumElts / 2;
7454 for (unsigned j = 0; j < NumElts; j += 2) {
7455 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7456 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7457 return false;
7458 Idx += 1;
7459 }
7460 }
7461
7462 if (M.size() == NumElts*2)
7463 WhichResult = 0;
7464
7465 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7466 if (VT.is64BitVector() && EltSz == 32)
7467 return false;
7468
7469 return true;
7470}
7471
7472/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7473/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7474static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7475 unsigned &WhichResult,
7476 bool &isV_UNDEF) {
7477 isV_UNDEF = false;
7478 if (isVTRNMask(ShuffleMask, VT, WhichResult))
10
Calling 'isVTRNMask'
7479 return ARMISD::VTRN;
7480 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7481 return ARMISD::VUZP;
7482 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7483 return ARMISD::VZIP;
7484
7485 isV_UNDEF = true;
7486 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7487 return ARMISD::VTRN;
7488 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7489 return ARMISD::VUZP;
7490 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7491 return ARMISD::VZIP;
7492
7493 return 0;
7494}
7495
7496/// \return true if this is a reverse operation on an vector.
7497static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7498 unsigned NumElts = VT.getVectorNumElements();
7499 // Make sure the mask has the right size.
7500 if (NumElts != M.size())
7501 return false;
7502
7503 // Look for <15, ..., 3, -1, 1, 0>.
7504 for (unsigned i = 0; i != NumElts; ++i)
7505 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7506 return false;
7507
7508 return true;
7509}
7510
7511static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7512 unsigned NumElts = VT.getVectorNumElements();
7513 // Make sure the mask has the right size.
7514 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7515 return false;
7516
7517 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7518 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7519 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7520 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7521 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7522 int Ofs = Top ? 1 : 0;
7523 int Upper = SingleSource ? 0 : NumElts;
7524 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7525 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7526 return false;
7527 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7528 return false;
7529 }
7530 return true;
7531}
7532
7533static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7534 unsigned NumElts = VT.getVectorNumElements();
7535 // Make sure the mask has the right size.
7536 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7537 return false;
7538
7539 // If Top
7540 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7541 // This inserts Input2 into Input1
7542 // else if not Top
7543 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7544 // This inserts Input1 into Input2
7545 unsigned Offset = Top ? 0 : 1;
7546 unsigned N = SingleSource ? 0 : NumElts;
7547 for (unsigned i = 0; i < NumElts; i += 2) {
7548 if (M[i] >= 0 && M[i] != (int)i)
7549 return false;
7550 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7551 return false;
7552 }
7553
7554 return true;
7555}
7556
7557static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7558 unsigned NumElts = ToVT.getVectorNumElements();
7559 if (NumElts != M.size())
7560 return false;
7561
7562 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7563 // looking for patterns of:
7564 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7565 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7566
7567 unsigned Off0 = rev ? NumElts / 2 : 0;
7568 unsigned Off1 = rev ? 0 : NumElts / 2;
7569 for (unsigned i = 0; i < NumElts; i += 2) {
7570 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7571 return false;
7572 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7573 return false;
7574 }
7575
7576 return true;
7577}
7578
7579// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7580// from a pair of inputs. For example:
7581// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7582// FP_ROUND(EXTRACT_ELT(Y, 0),
7583// FP_ROUND(EXTRACT_ELT(X, 1),
7584// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7585static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG,
7586 const ARMSubtarget *ST) {
7587 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (BV.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("BV.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7587, __extension__
__PRETTY_FUNCTION__))
;
7588 if (!ST->hasMVEFloatOps())
7589 return SDValue();
7590
7591 SDLoc dl(BV);
7592 EVT VT = BV.getValueType();
7593 if (VT != MVT::v8f16)
7594 return SDValue();
7595
7596 // We are looking for a buildvector of fptrunc elements, where all the
7597 // elements are interleavingly extracted from two sources. Check the first two
7598 // items are valid enough and extract some info from them (they are checked
7599 // properly in the loop below).
7600 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7601 BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7602 BV.getOperand(0).getOperand(0).getConstantOperandVal(1) != 0)
7603 return SDValue();
7604 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7605 BV.getOperand(1).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7606 BV.getOperand(1).getOperand(0).getConstantOperandVal(1) != 0)
7607 return SDValue();
7608 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7609 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7610 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7611 return SDValue();
7612
7613 // Check all the values in the BuildVector line up with our expectations.
7614 for (unsigned i = 1; i < 4; i++) {
7615 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7616 return Trunc.getOpcode() == ISD::FP_ROUND &&
7617 Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7618 Trunc.getOperand(0).getOperand(0) == Op &&
7619 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7620 };
7621 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7622 return SDValue();
7623 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7624 return SDValue();
7625 }
7626
7627 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7628 DAG.getConstant(0, dl, MVT::i32));
7629 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7630 DAG.getConstant(1, dl, MVT::i32));
7631}
7632
7633// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7634// from a single input on alternating lanes. For example:
7635// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7636// FP_ROUND(EXTRACT_ELT(X, 2),
7637// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7638static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG,
7639 const ARMSubtarget *ST) {
7640 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (BV.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("BV.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7640, __extension__
__PRETTY_FUNCTION__))
;
7641 if (!ST->hasMVEFloatOps())
7642 return SDValue();
7643
7644 SDLoc dl(BV);
7645 EVT VT = BV.getValueType();
7646 if (VT != MVT::v4f32)
7647 return SDValue();
7648
7649 // We are looking for a buildvector of fptext elements, where all the
7650 // elements are alternating lanes from a single source. For example <0,2,4,6>
7651 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7652 // info from them (they are checked properly in the loop below).
7653 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7654 BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7655 return SDValue();
7656 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7657 int Offset = BV.getOperand(0).getOperand(0).getConstantOperandVal(1);
7658 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7659 return SDValue();
7660
7661 // Check all the values in the BuildVector line up with our expectations.
7662 for (unsigned i = 1; i < 4; i++) {
7663 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7664 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7665 Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7666 Trunc.getOperand(0).getOperand(0) == Op &&
7667 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7668 };
7669 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7670 return SDValue();
7671 }
7672
7673 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7674 DAG.getConstant(Offset, dl, MVT::i32));
7675}
7676
7677// If N is an integer constant that can be moved into a register in one
7678// instruction, return an SDValue of such a constant (will become a MOV
7679// instruction). Otherwise return null.
7680static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
7681 const ARMSubtarget *ST, const SDLoc &dl) {
7682 uint64_t Val;
7683 if (!isa<ConstantSDNode>(N))
7684 return SDValue();
7685 Val = cast<ConstantSDNode>(N)->getZExtValue();
7686
7687 if (ST->isThumb1Only()) {
7688 if (Val <= 255 || ~Val <= 255)
7689 return DAG.getConstant(Val, dl, MVT::i32);
7690 } else {
7691 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7692 return DAG.getConstant(Val, dl, MVT::i32);
7693 }
7694 return SDValue();
7695}
7696
7697static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
7698 const ARMSubtarget *ST) {
7699 SDLoc dl(Op);
7700 EVT VT = Op.getValueType();
7701
7702 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerBUILD_VECTOR_i1 called without MVE!") ? void (0) : __assert_fail
("ST->hasMVEIntegerOps() && \"LowerBUILD_VECTOR_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7702, __extension__
__PRETTY_FUNCTION__))
;
7703
7704 unsigned NumElts = VT.getVectorNumElements();
7705 unsigned BoolMask;
7706 unsigned BitsPerBool;
7707 if (NumElts == 2) {
7708 BitsPerBool = 8;
7709 BoolMask = 0xff;
7710 } else if (NumElts == 4) {
7711 BitsPerBool = 4;
7712 BoolMask = 0xf;
7713 } else if (NumElts == 8) {
7714 BitsPerBool = 2;
7715 BoolMask = 0x3;
7716 } else if (NumElts == 16) {
7717 BitsPerBool = 1;
7718 BoolMask = 0x1;
7719 } else
7720 return SDValue();
7721
7722 // If this is a single value copied into all lanes (a splat), we can just sign
7723 // extend that single value
7724 SDValue FirstOp = Op.getOperand(0);
7725 if (!isa<ConstantSDNode>(FirstOp) &&
7726 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7727 return U.get().isUndef() || U.get() == FirstOp;
7728 })) {
7729 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7730 DAG.getValueType(MVT::i1));
7731 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7732 }
7733
7734 // First create base with bits set where known
7735 unsigned Bits32 = 0;
7736 for (unsigned i = 0; i < NumElts; ++i) {
7737 SDValue V = Op.getOperand(i);
7738 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7739 continue;
7740 bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue();
7741 if (BitSet)
7742 Bits32 |= BoolMask << (i * BitsPerBool);
7743 }
7744
7745 // Add in unknown nodes
7746 SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
7747 DAG.getConstant(Bits32, dl, MVT::i32));
7748 for (unsigned i = 0; i < NumElts; ++i) {
7749 SDValue V = Op.getOperand(i);
7750 if (isa<ConstantSDNode>(V) || V.isUndef())
7751 continue;
7752 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7753 DAG.getConstant(i, dl, MVT::i32));
7754 }
7755
7756 return Base;
7757}
7758
7759static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG,
7760 const ARMSubtarget *ST) {
7761 if (!ST->hasMVEIntegerOps())
7762 return SDValue();
7763
7764 // We are looking for a buildvector where each element is Op[0] + i*N
7765 EVT VT = Op.getValueType();
7766 SDValue Op0 = Op.getOperand(0);
7767 unsigned NumElts = VT.getVectorNumElements();
7768
7769 // Get the increment value from operand 1
7770 SDValue Op1 = Op.getOperand(1);
7771 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7772 !isa<ConstantSDNode>(Op1.getOperand(1)))
7773 return SDValue();
7774 unsigned N = Op1.getConstantOperandVal(1);
7775 if (N != 1 && N != 2 && N != 4 && N != 8)
7776 return SDValue();
7777
7778 // Check that each other operand matches
7779 for (unsigned I = 2; I < NumElts; I++) {
7780 SDValue OpI = Op.getOperand(I);
7781 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7782 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7783 OpI.getConstantOperandVal(1) != I * N)
7784 return SDValue();
7785 }
7786
7787 SDLoc DL(Op);
7788 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7789 DAG.getConstant(N, DL, MVT::i32));
7790}
7791
7792// Returns true if the operation N can be treated as qr instruction variant at
7793// operand Op.
7794static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7795 switch (N->getOpcode()) {
7796 case ISD::ADD:
7797 case ISD::MUL:
7798 case ISD::SADDSAT:
7799 case ISD::UADDSAT:
7800 return true;
7801 case ISD::SUB:
7802 case ISD::SSUBSAT:
7803 case ISD::USUBSAT:
7804 return N->getOperand(1).getNode() == Op;
7805 case ISD::INTRINSIC_WO_CHAIN:
7806 switch (N->getConstantOperandVal(0)) {
7807 case Intrinsic::arm_mve_add_predicated:
7808 case Intrinsic::arm_mve_mul_predicated:
7809 case Intrinsic::arm_mve_qadd_predicated:
7810 case Intrinsic::arm_mve_vhadd:
7811 case Intrinsic::arm_mve_hadd_predicated:
7812 case Intrinsic::arm_mve_vqdmulh:
7813 case Intrinsic::arm_mve_qdmulh_predicated:
7814 case Intrinsic::arm_mve_vqrdmulh:
7815 case Intrinsic::arm_mve_qrdmulh_predicated:
7816 case Intrinsic::arm_mve_vqdmull:
7817 case Intrinsic::arm_mve_vqdmull_predicated:
7818 return true;
7819 case Intrinsic::arm_mve_sub_predicated:
7820 case Intrinsic::arm_mve_qsub_predicated:
7821 case Intrinsic::arm_mve_vhsub:
7822 case Intrinsic::arm_mve_hsub_predicated:
7823 return N->getOperand(2).getNode() == Op;
7824 default:
7825 return false;
7826 }
7827 default:
7828 return false;
7829 }
7830}
7831
7832// If this is a case we can't handle, return null and let the default
7833// expansion code take care of it.
7834SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7835 const ARMSubtarget *ST) const {
7836 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7837 SDLoc dl(Op);
7838 EVT VT = Op.getValueType();
7839
7840 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7841 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7842
7843 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7844 return R;
7845
7846 APInt SplatBits, SplatUndef;
7847 unsigned SplatBitSize;
7848 bool HasAnyUndefs;
7849 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7850 if (SplatUndef.isAllOnes())
7851 return DAG.getUNDEF(VT);
7852
7853 // If all the users of this constant splat are qr instruction variants,
7854 // generate a vdup of the constant.
7855 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7856 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7857 all_of(BVN->uses(),
7858 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7859 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7860 : SplatBitSize == 16 ? MVT::v8i16
7861 : MVT::v16i8;
7862 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7863 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7864 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7865 }
7866
7867 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7868 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7869 // Check if an immediate VMOV works.
7870 EVT VmovVT;
7871 SDValue Val =
7872 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7873 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7874
7875 if (Val.getNode()) {
7876 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7877 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7878 }
7879
7880 // Try an immediate VMVN.
7881 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7882 Val = isVMOVModifiedImm(
7883 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7884 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7885 if (Val.getNode()) {
7886 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7887 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7888 }
7889
7890 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7891 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7892 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7893 if (ImmVal != -1) {
7894 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7895 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7896 }
7897 }
7898
7899 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7900 // type.
7901 if (ST->hasMVEIntegerOps() &&
7902 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7903 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7904 : SplatBitSize == 16 ? MVT::v8i16
7905 : MVT::v16i8;
7906 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7907 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7908 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7909 }
7910 }
7911 }
7912
7913 // Scan through the operands to see if only one value is used.
7914 //
7915 // As an optimisation, even if more than one value is used it may be more
7916 // profitable to splat with one value then change some lanes.
7917 //
7918 // Heuristically we decide to do this if the vector has a "dominant" value,
7919 // defined as splatted to more than half of the lanes.
7920 unsigned NumElts = VT.getVectorNumElements();
7921 bool isOnlyLowElement = true;
7922 bool usesOnlyOneValue = true;
7923 bool hasDominantValue = false;
7924 bool isConstant = true;
7925
7926 // Map of the number of times a particular SDValue appears in the
7927 // element list.
7928 DenseMap<SDValue, unsigned> ValueCounts;
7929 SDValue Value;
7930 for (unsigned i = 0; i < NumElts; ++i) {
7931 SDValue V = Op.getOperand(i);
7932 if (V.isUndef())
7933 continue;
7934 if (i > 0)
7935 isOnlyLowElement = false;
7936 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7937 isConstant = false;
7938
7939 ValueCounts.insert(std::make_pair(V, 0));
7940 unsigned &Count = ValueCounts[V];
7941
7942 // Is this value dominant? (takes up more than half of the lanes)
7943 if (++Count > (NumElts / 2)) {
7944 hasDominantValue = true;
7945 Value = V;
7946 }
7947 }
7948 if (ValueCounts.size() != 1)
7949 usesOnlyOneValue = false;
7950 if (!Value.getNode() && !ValueCounts.empty())
7951 Value = ValueCounts.begin()->first;
7952
7953 if (ValueCounts.empty())
7954 return DAG.getUNDEF(VT);
7955
7956 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7957 // Keep going if we are hitting this case.
7958 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7959 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7960
7961 unsigned EltSize = VT.getScalarSizeInBits();
7962
7963 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7964 // i32 and try again.
7965 if (hasDominantValue && EltSize <= 32) {
7966 if (!isConstant) {
7967 SDValue N;
7968
7969 // If we are VDUPing a value that comes directly from a vector, that will
7970 // cause an unnecessary move to and from a GPR, where instead we could
7971 // just use VDUPLANE. We can only do this if the lane being extracted
7972 // is at a constant index, as the VDUP from lane instructions only have
7973 // constant-index forms.
7974 ConstantSDNode *constIndex;
7975 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7976 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7977 // We need to create a new undef vector to use for the VDUPLANE if the
7978 // size of the vector from which we get the value is different than the
7979 // size of the vector that we need to create. We will insert the element
7980 // such that the register coalescer will remove unnecessary copies.
7981 if (VT != Value->getOperand(0).getValueType()) {
7982 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7983 VT.getVectorNumElements();
7984 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7985 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7986 Value, DAG.getConstant(index, dl, MVT::i32)),
7987 DAG.getConstant(index, dl, MVT::i32));
7988 } else
7989 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7990 Value->getOperand(0), Value->getOperand(1));
7991 } else
7992 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7993
7994 if (!usesOnlyOneValue) {
7995 // The dominant value was splatted as 'N', but we now have to insert
7996 // all differing elements.
7997 for (unsigned I = 0; I < NumElts; ++I) {
7998 if (Op.getOperand(I) == Value)
7999 continue;
8000 SmallVector<SDValue, 3> Ops;
8001 Ops.push_back(N);
8002 Ops.push_back(Op.getOperand(I));
8003 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8004 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8005 }
8006 }
8007 return N;
8008 }
8009 if (VT.getVectorElementType().isFloatingPoint()) {
8010 SmallVector<SDValue, 8> Ops;
8011 MVT FVT = VT.getVectorElementType().getSimpleVT();
8012 assert(FVT == MVT::f32 || FVT == MVT::f16)(static_cast <bool> (FVT == MVT::f32 || FVT == MVT::f16
) ? void (0) : __assert_fail ("FVT == MVT::f32 || FVT == MVT::f16"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8012, __extension__
__PRETTY_FUNCTION__))
;
8013 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8014 for (unsigned i = 0; i < NumElts; ++i)
8015 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8016 Op.getOperand(i)));
8017 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8018 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8019 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8020 if (Val.getNode())
8021 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8022 }
8023 if (usesOnlyOneValue) {
8024 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8025 if (isConstant && Val.getNode())
8026 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8027 }
8028 }
8029
8030 // If all elements are constants and the case above didn't get hit, fall back
8031 // to the default expansion, which will generate a load from the constant
8032 // pool.
8033 if (isConstant)
8034 return SDValue();
8035
8036 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8037 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8038 // length <= 2.
8039 if (NumElts >= 4)
8040 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8041 return shuffle;
8042
8043 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8044 // VCVT's
8045 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8046 return VCVT;
8047 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8048 return VCVT;
8049
8050 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8051 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8052 // into two 64-bit vectors; we might discover a better way to lower it.
8053 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8054 EVT ExtVT = VT.getVectorElementType();
8055 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8056 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8057 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8058 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8059 SDValue Upper =
8060 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8061 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8062 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8063 if (Lower && Upper)
8064 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8065 }
8066
8067 // Vectors with 32- or 64-bit elements can be built by directly assigning
8068 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8069 // will be legalized.
8070 if (EltSize >= 32) {
8071 // Do the expansion with floating-point types, since that is what the VFP
8072 // registers are defined to use, and since i64 is not legal.
8073 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8074 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8075 SmallVector<SDValue, 8> Ops;
8076 for (unsigned i = 0; i < NumElts; ++i)
8077 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8078 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8079 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8080 }
8081
8082 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8083 // know the default expansion would otherwise fall back on something even
8084 // worse. For a vector with one or two non-undef values, that's
8085 // scalar_to_vector for the elements followed by a shuffle (provided the
8086 // shuffle is valid for the target) and materialization element by element
8087 // on the stack followed by a load for everything else.
8088 if (!isConstant && !usesOnlyOneValue) {
8089 SDValue Vec = DAG.getUNDEF(VT);
8090 for (unsigned i = 0 ; i < NumElts; ++i) {
8091 SDValue V = Op.getOperand(i);
8092 if (V.isUndef())
8093 continue;
8094 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8095 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8096 }
8097 return Vec;
8098 }
8099
8100 return SDValue();
8101}
8102
8103// Gather data to see if the operation can be modelled as a
8104// shuffle in combination with VEXTs.
8105SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8106 SelectionDAG &DAG) const {
8107 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8107, __extension__
__PRETTY_FUNCTION__))
;
8108 SDLoc dl(Op);
8109 EVT VT = Op.getValueType();
8110 unsigned NumElts = VT.getVectorNumElements();
8111
8112 struct ShuffleSourceInfo {
8113 SDValue Vec;
8114 unsigned MinElt = std::numeric_limits<unsigned>::max();
8115 unsigned MaxElt = 0;
8116
8117 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8118 // be compatible with the shuffle we intend to construct. As a result
8119 // ShuffleVec will be some sliding window into the original Vec.
8120 SDValue ShuffleVec;
8121
8122 // Code should guarantee that element i in Vec starts at element "WindowBase
8123 // + i * WindowScale in ShuffleVec".
8124 int WindowBase = 0;
8125 int WindowScale = 1;
8126
8127 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8128
8129 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8130 };
8131
8132 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8133 // node.
8134 SmallVector<ShuffleSourceInfo, 2> Sources;
8135 for (unsigned i = 0; i < NumElts; ++i) {
8136 SDValue V = Op.getOperand(i);
8137 if (V.isUndef())
8138 continue;
8139 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8140 // A shuffle can only come from building a vector from various
8141 // elements of other vectors.
8142 return SDValue();
8143 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8144 // Furthermore, shuffles require a constant mask, whereas extractelts
8145 // accept variable indices.
8146 return SDValue();
8147 }
8148
8149 // Add this element source to the list if it's not already there.
8150 SDValue SourceVec = V.getOperand(0);
8151 auto Source = llvm::find(Sources, SourceVec);
8152 if (Source == Sources.end())
8153 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8154
8155 // Update the minimum and maximum lane number seen.
8156 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8157 Source->MinElt = std::min(Source->MinElt, EltNo);
8158 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8159 }
8160
8161 // Currently only do something sane when at most two source vectors
8162 // are involved.
8163 if (Sources.size() > 2)
8164 return SDValue();
8165
8166 // Find out the smallest element size among result and two sources, and use
8167 // it as element size to build the shuffle_vector.
8168 EVT SmallestEltTy = VT.getVectorElementType();
8169 for (auto &Source : Sources) {
8170 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8171 if (SrcEltTy.bitsLT(SmallestEltTy))
8172 SmallestEltTy = SrcEltTy;
8173 }
8174 unsigned ResMultiplier =
8175 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8176 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8177 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8178
8179 // If the source vector is too wide or too narrow, we may nevertheless be able
8180 // to construct a compatible shuffle either by concatenating it with UNDEF or
8181 // extracting a suitable range of elements.
8182 for (auto &Src : Sources) {
8183 EVT SrcVT = Src.ShuffleVec.getValueType();
8184
8185 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8186 uint64_t VTSize = VT.getFixedSizeInBits();
8187 if (SrcVTSize == VTSize)
8188 continue;
8189
8190 // This stage of the search produces a source with the same element type as
8191 // the original, but with a total width matching the BUILD_VECTOR output.
8192 EVT EltVT = SrcVT.getVectorElementType();
8193 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8194 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8195
8196 if (SrcVTSize < VTSize) {
8197 if (2 * SrcVTSize != VTSize)
8198 return SDValue();
8199 // We can pad out the smaller vector for free, so if it's part of a
8200 // shuffle...
8201 Src.ShuffleVec =
8202 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8203 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8204 continue;
8205 }
8206
8207 if (SrcVTSize != 2 * VTSize)
8208 return SDValue();
8209
8210 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8211 // Span too large for a VEXT to cope
8212 return SDValue();
8213 }
8214
8215 if (Src.MinElt >= NumSrcElts) {
8216 // The extraction can just take the second half
8217 Src.ShuffleVec =
8218 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8219 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8220 Src.WindowBase = -NumSrcElts;
8221 } else if (Src.MaxElt < NumSrcElts) {
8222 // The extraction can just take the first half
8223 Src.ShuffleVec =
8224 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8225 DAG.getConstant(0, dl, MVT::i32));
8226 } else {
8227 // An actual VEXT is needed
8228 SDValue VEXTSrc1 =
8229 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8230 DAG.getConstant(0, dl, MVT::i32));
8231 SDValue VEXTSrc2 =
8232 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8233 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8234
8235 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8236 VEXTSrc2,
8237 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8238 Src.WindowBase = -Src.MinElt;
8239 }
8240 }
8241
8242 // Another possible incompatibility occurs from the vector element types. We
8243 // can fix this by bitcasting the source vectors to the same type we intend
8244 // for the shuffle.
8245 for (auto &Src : Sources) {
8246 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8247 if (SrcEltTy == SmallestEltTy)
8248 continue;
8249 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8249, __extension__
__PRETTY_FUNCTION__))
;
8250 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8251 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8252 Src.WindowBase *= Src.WindowScale;
8253 }
8254
8255 // Final check before we try to actually produce a shuffle.
8256 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 8258, __extension__
__PRETTY_FUNCTION__));; } } while (false)
8257 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 8258, __extension__
__PRETTY_FUNCTION__));; } } while (false)
8258 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 8258, __extension__
__PRETTY_FUNCTION__));; } } while (false)
;
8259
8260 // The stars all align, our next step is to produce the mask for the shuffle.
8261 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8262 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8263 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8264 SDValue Entry = Op.getOperand(i);
8265 if (Entry.isUndef())
8266 continue;
8267
8268 auto Src = llvm::find(Sources, Entry.getOperand(0));
8269 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8270
8271 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8272 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8273 // segment.
8274 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8275 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8276 VT.getScalarSizeInBits());
8277 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8278
8279 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8280 // starting at the appropriate offset.
8281 int *LaneMask = &Mask[i * ResMultiplier];
8282
8283 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8284 ExtractBase += NumElts * (Src - Sources.begin());
8285 for (int j = 0; j < LanesDefined; ++j)
8286 LaneMask[j] = ExtractBase + j;
8287 }
8288
8289
8290 // We can't handle more than two sources. This should have already
8291 // been checked before this point.
8292 assert(Sources.size() <= 2 && "Too many sources!")(static_cast <bool> (Sources.size() <= 2 && "Too many sources!"
) ? void (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8292, __extension__
__PRETTY_FUNCTION__))
;
8293
8294 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8295 for (unsigned i = 0; i < Sources.size(); ++i)
8296 ShuffleOps[i] = Sources[i].ShuffleVec;
8297
8298 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8299 ShuffleOps[1], Mask, DAG);
8300 if (!Shuffle)
8301 return SDValue();
8302 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8303}
8304
8305enum ShuffleOpCodes {
8306 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8307 OP_VREV,
8308 OP_VDUP0,
8309 OP_VDUP1,
8310 OP_VDUP2,
8311 OP_VDUP3,
8312 OP_VEXT1,
8313 OP_VEXT2,
8314 OP_VEXT3,
8315 OP_VUZPL, // VUZP, left result
8316 OP_VUZPR, // VUZP, right result
8317 OP_VZIPL, // VZIP, left result
8318 OP_VZIPR, // VZIP, right result
8319 OP_VTRNL, // VTRN, left result
8320 OP_VTRNR // VTRN, right result
8321};
8322
8323static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8324 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8325 switch (OpNum) {
8326 case OP_COPY:
8327 case OP_VREV:
8328 case OP_VDUP0:
8329 case OP_VDUP1:
8330 case OP_VDUP2:
8331 case OP_VDUP3:
8332 return true;
8333 }
8334 return false;
8335}
8336
8337/// isShuffleMaskLegal - Targets can use this to indicate that they only
8338/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8339/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8340/// are assumed to be legal.
8341bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
8342 if (VT.getVectorNumElements() == 4 &&
1
Assuming the condition is false
8343 (VT.is128BitVector() || VT.is64BitVector())) {
8344 unsigned PFIndexes[4];
8345 for (unsigned i = 0; i != 4; ++i) {
8346 if (M[i] < 0)
8347 PFIndexes[i] = 8;
8348 else
8349 PFIndexes[i] = M[i];
8350 }
8351
8352 // Compute the index in the perfect shuffle table.
8353 unsigned PFTableIndex =
8354 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8355 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8356 unsigned Cost = (PFEntry >> 30);
8357
8358 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8359 return true;
8360 }
8361
8362 bool ReverseVEXT, isV_UNDEF;
8363 unsigned Imm, WhichResult;
8364
8365 unsigned EltSize = VT.getScalarSizeInBits();
8366 if (EltSize >= 32 ||
2
Assuming 'EltSize' is < 32
8367 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3
Assuming the condition is false
8368 ShuffleVectorInst::isIdentityMask(M) ||
4
Assuming the condition is false
8369 isVREVMask(M, VT, 64) ||
5
Assuming the condition is false
8370 isVREVMask(M, VT, 32) ||
6
Assuming the condition is false
8371 isVREVMask(M, VT, 16))
7
Assuming the condition is false
8372 return true;
8373 else if (Subtarget->hasNEON() &&
8
Assuming the condition is true
8374 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8375 isVTBLMask(M, VT) ||
8376 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
9
Calling 'isNEONTwoResultShuffleMask'
8377 return true;
8378 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8379 isReverseMask(M, VT))
8380 return true;
8381 else if (Subtarget->hasMVEIntegerOps() &&
8382 (isVMOVNMask(M, VT, true, false) ||
8383 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8384 return true;
8385 else if (Subtarget->hasMVEIntegerOps() &&
8386 (isTruncMask(M, VT, false, false) ||
8387 isTruncMask(M, VT, false, true) ||
8388 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8389 return true;
8390 else
8391 return false;
8392}
8393
8394/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8395/// the specified operations to build the shuffle.
8396static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8397 SDValue RHS, SelectionDAG &DAG,
8398 const SDLoc &dl) {
8399 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8400 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8401 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8402
8403 if (OpNum == OP_COPY) {
8404 if (LHSID == (1*9+2)*9+3) return LHS;
8405 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!") ? void (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8405, __extension__
__PRETTY_FUNCTION__))
;
8406 return RHS;
8407 }
8408
8409 SDValue OpLHS, OpRHS;
8410 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8411 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8412 EVT VT = OpLHS.getValueType();
8413
8414 switch (OpNum) {
8415 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8415)
;
8416 case OP_VREV:
8417 // VREV divides the vector in half and swaps within the half.
8418 if (VT.getScalarSizeInBits() == 32)
8419 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8420 // vrev <4 x i16> -> VREV32
8421 if (VT.getScalarSizeInBits() == 16)
8422 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8423 // vrev <4 x i8> -> VREV16
8424 assert(VT.getScalarSizeInBits() == 8)(static_cast <bool> (VT.getScalarSizeInBits() == 8) ? void
(0) : __assert_fail ("VT.getScalarSizeInBits() == 8", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8424, __extension__ __PRETTY_FUNCTION__))
;
8425 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8426 case OP_VDUP0:
8427 case OP_VDUP1:
8428 case OP_VDUP2:
8429 case OP_VDUP3:
8430 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8431 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8432 case OP_VEXT1:
8433 case OP_VEXT2:
8434 case OP_VEXT3:
8435 return DAG.getNode(ARMISD::VEXT, dl, VT,
8436 OpLHS, OpRHS,
8437 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8438 case OP_VUZPL:
8439 case OP_VUZPR:
8440 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8441 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8442 case OP_VZIPL:
8443 case OP_VZIPR:
8444 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8445 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8446 case OP_VTRNL:
8447 case OP_VTRNR:
8448 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8449 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8450 }
8451}
8452
8453static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
8454 ArrayRef<int> ShuffleMask,
8455 SelectionDAG &DAG) {
8456 // Check to see if we can use the VTBL instruction.
8457 SDValue V1 = Op.getOperand(0);
8458 SDValue V2 = Op.getOperand(1);
8459 SDLoc DL(Op);
8460
8461 SmallVector<SDValue, 8> VTBLMask;
8462 for (int I : ShuffleMask)
8463 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8464
8465 if (V2.getNode()->isUndef())
8466 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8467 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8468
8469 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8470 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8471}
8472
8473static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
8474 SDLoc DL(Op);
8475 EVT VT = Op.getValueType();
8476
8477 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? void (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8478, __extension__
__PRETTY_FUNCTION__))
8478 "Expect an v8i16/v16i8 type")(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? void (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8478, __extension__
__PRETTY_FUNCTION__))
;
8479 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8480 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8481 // extract the first 8 bytes into the top double word and the last 8 bytes
8482 // into the bottom double word, through a new vector shuffle that will be
8483 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8484 std::vector<int> NewMask;
8485 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8486 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8487 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8488 NewMask.push_back(i);
8489 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8490}
8491
8492static EVT getVectorTyFromPredicateVector(EVT VT) {
8493 switch (VT.getSimpleVT().SimpleTy) {
8494 case MVT::v2i1:
8495 return MVT::v2f64;
8496 case MVT::v4i1:
8497 return MVT::v4i32;
8498 case MVT::v8i1:
8499 return MVT::v8i16;
8500 case MVT::v16i1:
8501 return MVT::v16i8;
8502 default:
8503 llvm_unreachable("Unexpected vector predicate type")::llvm::llvm_unreachable_internal("Unexpected vector predicate type"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8503)
;
8504 }
8505}
8506
8507static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT,
8508 SelectionDAG &DAG) {
8509 // Converting from boolean predicates to integers involves creating a vector
8510 // of all ones or all zeroes and selecting the lanes based upon the real
8511 // predicate.
8512 SDValue AllOnes =
8513 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8514 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8515
8516 SDValue AllZeroes =
8517 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8518 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8519
8520 // Get full vector type from predicate type
8521 EVT NewVT = getVectorTyFromPredicateVector(VT);
8522
8523 SDValue RecastV1;
8524 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8525 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8526 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8527 // since we know in hardware the sizes are really the same.
8528 if (VT != MVT::v16i1)
8529 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8530 else
8531 RecastV1 = Pred;
8532
8533 // Select either all ones or zeroes depending upon the real predicate bits.
8534 SDValue PredAsVector =
8535 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8536
8537 // Recast our new predicate-as-integer v16i8 vector into something
8538 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8539 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8540}
8541
8542static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
8543 const ARMSubtarget *ST) {
8544 EVT VT = Op.getValueType();
8545 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8546 ArrayRef<int> ShuffleMask = SVN->getMask();
8547
8548 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No support for vector shuffle of boolean predicates") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No support for vector shuffle of boolean predicates\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8549, __extension__
__PRETTY_FUNCTION__))
8549 "No support for vector shuffle of boolean predicates")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No support for vector shuffle of boolean predicates") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No support for vector shuffle of boolean predicates\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8549, __extension__
__PRETTY_FUNCTION__))
;
8550
8551 SDValue V1 = Op.getOperand(0);
8552 SDValue V2 = Op.getOperand(1);
8553 SDLoc dl(Op);
8554 if (isReverseMask(ShuffleMask, VT)) {
8555 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8556 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8557 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8558 DAG.getConstant(16, dl, MVT::i32));
8559 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8560 }
8561
8562 // Until we can come up with optimised cases for every single vector
8563 // shuffle in existence we have chosen the least painful strategy. This is
8564 // to essentially promote the boolean predicate to a 8-bit integer, where
8565 // each predicate represents a byte. Then we fall back on a normal integer
8566 // vector shuffle and convert the result back into a predicate vector. In
8567 // many cases the generated code might be even better than scalar code
8568 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8569 // fields in a register into 8 other arbitrary 2-bit fields!
8570 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8571 EVT NewVT = PredAsVector1.getValueType();
8572 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8573 : PromoteMVEPredVector(dl, V2, VT, DAG);
8574 assert(PredAsVector2.getValueType() == NewVT &&(static_cast <bool> (PredAsVector2.getValueType() == NewVT
&& "Expected identical vector type in expanded i1 shuffle!"
) ? void (0) : __assert_fail ("PredAsVector2.getValueType() == NewVT && \"Expected identical vector type in expanded i1 shuffle!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8575, __extension__
__PRETTY_FUNCTION__))
8575 "Expected identical vector type in expanded i1 shuffle!")(static_cast <bool> (PredAsVector2.getValueType() == NewVT
&& "Expected identical vector type in expanded i1 shuffle!"
) ? void (0) : __assert_fail ("PredAsVector2.getValueType() == NewVT && \"Expected identical vector type in expanded i1 shuffle!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8575, __extension__
__PRETTY_FUNCTION__))
;
8576
8577 // Do the shuffle!
8578 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8579 PredAsVector2, ShuffleMask);
8580
8581 // Now return the result of comparing the shuffled vector with zero,
8582 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8583 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8584 if (VT == MVT::v2i1) {
8585 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8586 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8587 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8588 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8589 }
8590 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8591 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8592}
8593
8594static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
8595 ArrayRef<int> ShuffleMask,
8596 SelectionDAG &DAG) {
8597 // Attempt to lower the vector shuffle using as many whole register movs as
8598 // possible. This is useful for types smaller than 32bits, which would
8599 // often otherwise become a series for grp movs.
8600 SDLoc dl(Op);
8601 EVT VT = Op.getValueType();
8602 if (VT.getScalarSizeInBits() >= 32)
8603 return SDValue();
8604
8605 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Unexpected vector type") ? void
(0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Unexpected vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8606, __extension__
__PRETTY_FUNCTION__))
8606 "Unexpected vector type")(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Unexpected vector type") ? void
(0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Unexpected vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8606, __extension__
__PRETTY_FUNCTION__))
;
8607 int NumElts = VT.getVectorNumElements();
8608 int QuarterSize = NumElts / 4;
8609 // The four final parts of the vector, as i32's
8610 SDValue Parts[4];
8611
8612 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8613 // <u,u,u,u>), returning the vmov lane index
8614 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8615 // Detect which mov lane this would be from the first non-undef element.
8616 int MovIdx = -1;
8617 for (int i = 0; i < Length; i++) {
8618 if (ShuffleMask[Start + i] >= 0) {
8619 if (ShuffleMask[Start + i] % Length != i)
8620 return -1;
8621 MovIdx = ShuffleMask[Start + i] / Length;
8622 break;
8623 }
8624 }
8625 // If all items are undef, leave this for other combines
8626 if (MovIdx == -1)
8627 return -1;
8628 // Check the remaining values are the correct part of the same mov
8629 for (int i = 1; i < Length; i++) {
8630 if (ShuffleMask[Start + i] >= 0 &&
8631 (ShuffleMask[Start + i] / Length != MovIdx ||
8632 ShuffleMask[Start + i] % Length != i))
8633 return -1;
8634 }
8635 return MovIdx;
8636 };
8637
8638 for (int Part = 0; Part < 4; ++Part) {
8639 // Does this part look like a mov
8640 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8641 if (Elt != -1) {
8642 SDValue Input = Op->getOperand(0);
8643 if (Elt >= 4) {
8644 Input = Op->getOperand(1);
8645 Elt -= 4;
8646 }
8647 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8648 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8649 DAG.getConstant(Elt, dl, MVT::i32));
8650 }
8651 }
8652
8653 // Nothing interesting found, just return
8654 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8655 return SDValue();
8656
8657 // The other parts need to be built with the old shuffle vector, cast to a
8658 // v4i32 and extract_vector_elts
8659 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8660 SmallVector<int, 16> NewShuffleMask;
8661 for (int Part = 0; Part < 4; ++Part)
8662 for (int i = 0; i < QuarterSize; i++)
8663 NewShuffleMask.push_back(
8664 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8665 SDValue NewShuffle = DAG.getVectorShuffle(
8666 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8667 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8668
8669 for (int Part = 0; Part < 4; ++Part)
8670 if (!Parts[Part])
8671 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8672 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8673 }
8674 // Build a vector out of the various parts and bitcast it back to the original
8675 // type.
8676 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8677 return DAG.getBitcast(VT, NewVec);
8678}
8679
8680static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op,
8681 ArrayRef<int> ShuffleMask,
8682 SelectionDAG &DAG) {
8683 SDValue V1 = Op.getOperand(0);
8684 SDValue V2 = Op.getOperand(1);
8685 EVT VT = Op.getValueType();
8686 unsigned NumElts = VT.getVectorNumElements();
8687
8688 // An One-Off Identity mask is one that is mostly an identity mask from as
8689 // single source but contains a single element out-of-place, either from a
8690 // different vector or from another position in the same vector. As opposed to
8691 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8692 // pair directly.
8693 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8694 int &OffElement) {
8695 OffElement = -1;
8696 int NonUndef = 0;
8697 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8698 if (Mask[i] == -1)
8699 continue;
8700 NonUndef++;
8701 if (Mask[i] != i + BaseOffset) {
8702 if (OffElement == -1)
8703 OffElement = i;
8704 else
8705 return false;
8706 }
8707 }
8708 return NonUndef > 2 && OffElement != -1;
8709 };
8710 int OffElement;
8711 SDValue VInput;
8712 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8713 VInput = V1;
8714 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8715 VInput = V2;
8716 else
8717 return SDValue();
8718
8719 SDLoc dl(Op);
8720 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8721 ? MVT::i32
8722 : VT.getScalarType();
8723 SDValue Elt = DAG.getNode(
8724 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8725 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8726 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8727 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8728 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8729}
8730
8731static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
8732 const ARMSubtarget *ST) {
8733 SDValue V1 = Op.getOperand(0);
8734 SDValue V2 = Op.getOperand(1);
8735 SDLoc dl(Op);
8736 EVT VT = Op.getValueType();
8737 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8738 unsigned EltSize = VT.getScalarSizeInBits();
8739
8740 if (ST->hasMVEIntegerOps() && EltSize == 1)
8741 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8742
8743 // Convert shuffles that are directly supported on NEON to target-specific
8744 // DAG nodes, instead of keeping them as shuffles and matching them again
8745 // during code selection. This is more efficient and avoids the possibility
8746 // of inconsistencies between legalization and selection.
8747 // FIXME: floating-point vectors should be canonicalized to integer vectors
8748 // of the same time so that they get CSEd properly.
8749 ArrayRef<int> ShuffleMask = SVN->getMask();
8750
8751 if (EltSize <= 32) {
8752 if (SVN->isSplat()) {
8753 int Lane = SVN->getSplatIndex();
8754 // If this is undef splat, generate it via "just" vdup, if possible.
8755 if (Lane == -1) Lane = 0;
8756
8757 // Test if V1 is a SCALAR_TO_VECTOR.
8758 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8759 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8760 }
8761 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8762 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8763 // reaches it).
8764 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8765 !isa<ConstantSDNode>(V1.getOperand(0))) {
8766 bool IsScalarToVector = true;
8767 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8768 if (!V1.getOperand(i).isUndef()) {
8769 IsScalarToVector = false;
8770 break;
8771 }
8772 if (IsScalarToVector)
8773 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8774 }
8775 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8776 DAG.getConstant(Lane, dl, MVT::i32));
8777 }
8778
8779 bool ReverseVEXT = false;
8780 unsigned Imm = 0;
8781 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8782 if (ReverseVEXT)
8783 std::swap(V1, V2);
8784 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8785 DAG.getConstant(Imm, dl, MVT::i32));
8786 }
8787
8788 if (isVREVMask(ShuffleMask, VT, 64))
8789 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8790 if (isVREVMask(ShuffleMask, VT, 32))
8791 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8792 if (isVREVMask(ShuffleMask, VT, 16))
8793 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8794
8795 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8796 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8797 DAG.getConstant(Imm, dl, MVT::i32));
8798 }
8799
8800 // Check for Neon shuffles that modify both input vectors in place.
8801 // If both results are used, i.e., if there are two shuffles with the same
8802 // source operands and with masks corresponding to both results of one of
8803 // these operations, DAG memoization will ensure that a single node is
8804 // used for both shuffles.
8805 unsigned WhichResult = 0;
8806 bool isV_UNDEF = false;
8807 if (ST->hasNEON()) {
8808 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8809 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8810 if (isV_UNDEF)
8811 V2 = V1;
8812 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8813 .getValue(WhichResult);
8814 }
8815 }
8816 if (ST->hasMVEIntegerOps()) {
8817 if (isVMOVNMask(ShuffleMask, VT, false, false))
8818 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8819 DAG.getConstant(0, dl, MVT::i32));
8820 if (isVMOVNMask(ShuffleMask, VT, true, false))
8821 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8822 DAG.getConstant(1, dl, MVT::i32));
8823 if (isVMOVNMask(ShuffleMask, VT, true, true))
8824 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8825 DAG.getConstant(1, dl, MVT::i32));
8826 }
8827
8828 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8829 // shuffles that produce a result larger than their operands with:
8830 // shuffle(concat(v1, undef), concat(v2, undef))
8831 // ->
8832 // shuffle(concat(v1, v2), undef)
8833 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8834 //
8835 // This is useful in the general case, but there are special cases where
8836 // native shuffles produce larger results: the two-result ops.
8837 //
8838 // Look through the concat when lowering them:
8839 // shuffle(concat(v1, v2), undef)
8840 // ->
8841 // concat(VZIP(v1, v2):0, :1)
8842 //
8843 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8844 SDValue SubV1 = V1->getOperand(0);
8845 SDValue SubV2 = V1->getOperand(1);
8846 EVT SubVT = SubV1.getValueType();
8847
8848 // We expect these to have been canonicalized to -1.
8849 assert(llvm::all_of(ShuffleMask, [&](int i) {(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8851, __extension__
__PRETTY_FUNCTION__))
8850 return i < (int)VT.getVectorNumElements();(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8851, __extension__
__PRETTY_FUNCTION__))
8851 }) && "Unexpected shuffle index into UNDEF operand!")(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8851, __extension__
__PRETTY_FUNCTION__))
;
8852
8853 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8854 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8855 if (isV_UNDEF)
8856 SubV2 = SubV1;
8857 assert((WhichResult == 0) &&(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8858, __extension__
__PRETTY_FUNCTION__))
8858 "In-place shuffle of concat can only have one result!")(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8858, __extension__
__PRETTY_FUNCTION__))
;
8859 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8860 SubV1, SubV2);
8861 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8862 Res.getValue(1));
8863 }
8864 }
8865 }
8866
8867 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8868 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8869 return V;
8870
8871 for (bool Top : {false, true}) {
8872 for (bool SingleSource : {false, true}) {
8873 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8874 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8875 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8876 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8877 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8878 SingleSource ? V1 : V2);
8879 if (Top) {
8880 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8881 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8882 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8883 }
8884 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8885 }
8886 }
8887 }
8888 }
8889
8890 // If the shuffle is not directly supported and it has 4 elements, use
8891 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8892 unsigned NumElts = VT.getVectorNumElements();
8893 if (NumElts == 4) {
8894 unsigned PFIndexes[4];
8895 for (unsigned i = 0; i != 4; ++i) {
8896 if (ShuffleMask[i] < 0)
8897 PFIndexes[i] = 8;
8898 else
8899 PFIndexes[i] = ShuffleMask[i];
8900 }
8901
8902 // Compute the index in the perfect shuffle table.
8903 unsigned PFTableIndex =
8904 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8905 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8906 unsigned Cost = (PFEntry >> 30);
8907
8908 if (Cost <= 4) {
8909 if (ST->hasNEON())
8910 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8911 else if (isLegalMVEShuffleOp(PFEntry)) {
8912 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8913 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8914 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8915 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8916 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8917 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8918 }
8919 }
8920 }
8921
8922 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8923 if (EltSize >= 32) {
8924 // Do the expansion with floating-point types, since that is what the VFP
8925 // registers are defined to use, and since i64 is not legal.
8926 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8927 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8928 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8929 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8930 SmallVector<SDValue, 8> Ops;
8931 for (unsigned i = 0; i < NumElts; ++i) {
8932 if (ShuffleMask[i] < 0)
8933 Ops.push_back(DAG.getUNDEF(EltVT));
8934 else
8935 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8936 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8937 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8938 dl, MVT::i32)));
8939 }
8940 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8941 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8942 }
8943
8944 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8945 isReverseMask(ShuffleMask, VT))
8946 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
8947
8948 if (ST->hasNEON() && VT == MVT::v8i8)
8949 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8950 return NewOp;
8951
8952 if (ST->hasMVEIntegerOps())
8953 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8954 return NewOp;
8955
8956 return SDValue();
8957}
8958
8959static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
8960 const ARMSubtarget *ST) {
8961 EVT VecVT = Op.getOperand(0).getValueType();
8962 SDLoc dl(Op);
8963
8964 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8965, __extension__
__PRETTY_FUNCTION__))
8965 "LowerINSERT_VECTOR_ELT_i1 called without MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8965, __extension__
__PRETTY_FUNCTION__))
;
8966
8967 SDValue Conv =
8968 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8969 unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
8970 unsigned LaneWidth =
8971 getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
8972 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8973 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
8974 Op.getOperand(1), DAG.getValueType(MVT::i1));
8975 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8976 DAG.getConstant(~Mask, dl, MVT::i32));
8977 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8978}
8979
8980SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8981 SelectionDAG &DAG) const {
8982 // INSERT_VECTOR_ELT is legal only for immediate indexes.
8983 SDValue Lane = Op.getOperand(2);
8984 if (!isa<ConstantSDNode>(Lane))
8985 return SDValue();
8986
8987 SDValue Elt = Op.getOperand(1);
8988 EVT EltVT = Elt.getValueType();
8989
8990 if (Subtarget->hasMVEIntegerOps() &&
8991 Op.getValueType().getScalarSizeInBits() == 1)
8992 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8993
8994 if (getTypeAction(*DAG.getContext(), EltVT) ==
8995 TargetLowering::TypePromoteFloat) {
8996 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
8997 // but the type system will try to do that if we don't intervene.
8998 // Reinterpret any such vector-element insertion as one with the
8999 // corresponding integer types.
9000
9001 SDLoc dl(Op);
9002
9003 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9004 assert(getTypeAction(*DAG.getContext(), IEltVT) !=(static_cast <bool> (getTypeAction(*DAG.getContext(), IEltVT
) != TargetLowering::TypePromoteFloat) ? void (0) : __assert_fail
("getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::TypePromoteFloat"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9005, __extension__
__PRETTY_FUNCTION__))
9005 TargetLowering::TypePromoteFloat)(static_cast <bool> (getTypeAction(*DAG.getContext(), IEltVT
) != TargetLowering::TypePromoteFloat) ? void (0) : __assert_fail
("getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::TypePromoteFloat"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9005, __extension__
__PRETTY_FUNCTION__))
;
9006
9007 SDValue VecIn = Op.getOperand(0);
9008 EVT VecVT = VecIn.getValueType();
9009 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9010 VecVT.getVectorNumElements());
9011
9012 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9013 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9014 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9015 IVecIn, IElt, Lane);
9016 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9017 }
9018
9019 return Op;
9020}
9021
9022static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
9023 const ARMSubtarget *ST) {
9024 EVT VecVT = Op.getOperand(0).getValueType();
9025 SDLoc dl(Op);
9026
9027 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9028, __extension__
__PRETTY_FUNCTION__))
9028 "LowerINSERT_VECTOR_ELT_i1 called without MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9028, __extension__
__PRETTY_FUNCTION__))
;
9029
9030 SDValue Conv =
9031 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9032 unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
9033 unsigned LaneWidth =
9034 getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
9035 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9036 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9037 return Shift;
9038}
9039
9040static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,
9041 const ARMSubtarget *ST) {
9042 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9043 SDValue Lane = Op.getOperand(1);
9044 if (!isa<ConstantSDNode>(Lane))
9045 return SDValue();
9046
9047 SDValue Vec = Op.getOperand(0);
9048 EVT VT = Vec.getValueType();
9049
9050 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9051 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9052
9053 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9054 SDLoc dl(Op);
9055 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9056 }
9057
9058 return Op;
9059}
9060
9061static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
9062 const ARMSubtarget *ST) {
9063 SDLoc dl(Op);
9064 assert(Op.getValueType().getScalarSizeInBits() == 1 &&(static_cast <bool> (Op.getValueType().getScalarSizeInBits
() == 1 && "Unexpected custom CONCAT_VECTORS lowering"
) ? void (0) : __assert_fail ("Op.getValueType().getScalarSizeInBits() == 1 && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9065, __extension__
__PRETTY_FUNCTION__))
9065 "Unexpected custom CONCAT_VECTORS lowering")(static_cast <bool> (Op.getValueType().getScalarSizeInBits
() == 1 && "Unexpected custom CONCAT_VECTORS lowering"
) ? void (0) : __assert_fail ("Op.getValueType().getScalarSizeInBits() == 1 && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9065, __extension__
__PRETTY_FUNCTION__))
;
9066 assert(isPowerOf2_32(Op.getNumOperands()) &&(static_cast <bool> (isPowerOf2_32(Op.getNumOperands())
&& "Unexpected custom CONCAT_VECTORS lowering") ? void
(0) : __assert_fail ("isPowerOf2_32(Op.getNumOperands()) && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9067, __extension__
__PRETTY_FUNCTION__))
9067 "Unexpected custom CONCAT_VECTORS lowering")(static_cast <bool> (isPowerOf2_32(Op.getNumOperands())
&& "Unexpected custom CONCAT_VECTORS lowering") ? void
(0) : __assert_fail ("isPowerOf2_32(Op.getNumOperands()) && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9067, __extension__
__PRETTY_FUNCTION__))
;
9068 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"CONCAT_VECTORS lowering only supported for MVE") ? void (0)
: __assert_fail ("ST->hasMVEIntegerOps() && \"CONCAT_VECTORS lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9069, __extension__
__PRETTY_FUNCTION__))
9069 "CONCAT_VECTORS lowering only supported for MVE")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"CONCAT_VECTORS lowering only supported for MVE") ? void (0)
: __assert_fail ("ST->hasMVEIntegerOps() && \"CONCAT_VECTORS lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9069, __extension__
__PRETTY_FUNCTION__))
;
9070
9071 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9072 EVT Op1VT = V1.getValueType();
9073 EVT Op2VT = V2.getValueType();
9074 assert(Op1VT == Op2VT && "Operand types don't match!")(static_cast <bool> (Op1VT == Op2VT && "Operand types don't match!"
) ? void (0) : __assert_fail ("Op1VT == Op2VT && \"Operand types don't match!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9074, __extension__
__PRETTY_FUNCTION__))
;
9075 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9076
9077 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9078 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9079
9080 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9081 // promoted to v8i16, etc.
9082 MVT ElType =
9083 getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
9084 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9085
9086 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9087 // to be the right size for the destination. For example, if Op1 is v4i1
9088 // then the promoted vector is v4i32. The result of concatenation gives a
9089 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9090 // needs truncating to i16 and inserting in the result.
9091 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9092 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9093 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9094 EVT NewVT = NewV.getValueType();
9095 EVT ConcatVT = ConVec.getValueType();
9096 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9097 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9098 DAG.getIntPtrConstant(i, dl));
9099 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9100 DAG.getConstant(j, dl, MVT::i32));
9101 }
9102 return ConVec;
9103 };
9104 unsigned j = 0;
9105 ConVec = ExtractInto(NewV1, ConVec, j);
9106 ConVec = ExtractInto(NewV2, ConVec, j);
9107
9108 // Now return the result of comparing the subvector with zero, which will
9109 // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we
9110 // convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
9111 if (VT == MVT::v2i1) {
9112 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec);
9113 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
9114 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9115 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9116 }
9117 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9118 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9119 };
9120
9121 // Concat each pair of subvectors and pack into the lower half of the array.
9122 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9123 while (ConcatOps.size() > 1) {
9124 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9125 SDValue V1 = ConcatOps[I];
9126 SDValue V2 = ConcatOps[I + 1];
9127 ConcatOps[I / 2] = ConcatPair(V1, V2);
9128 }
9129 ConcatOps.resize(ConcatOps.size() / 2);
9130 }
9131 return ConcatOps[0];
9132}
9133
9134static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
9135 const ARMSubtarget *ST) {
9136 EVT VT = Op->getValueType(0);
9137 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9138 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9139
9140 // The only time a CONCAT_VECTORS operation can have legal types is when
9141 // two 64-bit vectors are concatenated to a 128-bit vector.
9142 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9143, __extension__
__PRETTY_FUNCTION__))
9143 "unexpected CONCAT_VECTORS")(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9143, __extension__
__PRETTY_FUNCTION__))
;
9144 SDLoc dl(Op);
9145 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9146 SDValue Op0 = Op.getOperand(0);
9147 SDValue Op1 = Op.getOperand(1);
9148 if (!Op0.isUndef())
9149 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9150 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9151 DAG.getIntPtrConstant(0, dl));
9152 if (!Op1.isUndef())
9153 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9154 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9155 DAG.getIntPtrConstant(1, dl));
9156 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9157}
9158
9159static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
9160 const ARMSubtarget *ST) {
9161 SDValue V1 = Op.getOperand(0);
9162 SDValue V2 = Op.getOperand(1);
9163 SDLoc dl(Op);
9164 EVT VT = Op.getValueType();
9165 EVT Op1VT = V1.getValueType();
9166 unsigned NumElts = VT.getVectorNumElements();
9167 unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue();
9168
9169 assert(VT.getScalarSizeInBits() == 1 &&(static_cast <bool> (VT.getScalarSizeInBits() == 1 &&
"Unexpected custom EXTRACT_SUBVECTOR lowering") ? void (0) :
__assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom EXTRACT_SUBVECTOR lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9170, __extension__
__PRETTY_FUNCTION__))
9170 "Unexpected custom EXTRACT_SUBVECTOR lowering")(static_cast <bool> (VT.getScalarSizeInBits() == 1 &&
"Unexpected custom EXTRACT_SUBVECTOR lowering") ? void (0) :
__assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom EXTRACT_SUBVECTOR lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9170, __extension__
__PRETTY_FUNCTION__))
;
9171 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"EXTRACT_SUBVECTOR lowering only supported for MVE") ? void (
0) : __assert_fail ("ST->hasMVEIntegerOps() && \"EXTRACT_SUBVECTOR lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9172, __extension__
__PRETTY_FUNCTION__))
9172 "EXTRACT_SUBVECTOR lowering only supported for MVE")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"EXTRACT_SUBVECTOR lowering only supported for MVE") ? void (
0) : __assert_fail ("ST->hasMVEIntegerOps() && \"EXTRACT_SUBVECTOR lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9172, __extension__
__PRETTY_FUNCTION__))
;
9173
9174 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9175
9176 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9177 // promoted to v8i16, etc.
9178
9179 MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
9180
9181 if (NumElts == 2) {
9182 EVT SubVT = MVT::v4i32;
9183 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9184 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9185 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9186 DAG.getIntPtrConstant(i, dl));
9187 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9188 DAG.getConstant(j, dl, MVT::i32));
9189 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9190 DAG.getConstant(j + 1, dl, MVT::i32));
9191 }
9192 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9193 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9194 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9195 }
9196
9197 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9198 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9199 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9200 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9201 DAG.getIntPtrConstant(i, dl));
9202 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9203 DAG.getConstant(j, dl, MVT::i32));
9204 }
9205
9206 // Now return the result of comparing the subvector with zero,
9207 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9208 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9209 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9210}
9211
9212// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9213static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG,
9214 const ARMSubtarget *ST) {
9215 assert(ST->hasMVEIntegerOps() && "Expected MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"Expected MVE!") ? void (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"Expected MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9215, __extension__
__PRETTY_FUNCTION__))
;
9216 EVT VT = N->getValueType(0);
9217 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&(static_cast <bool> ((VT == MVT::v16i1 || VT == MVT::v8i1
|| VT == MVT::v4i1) && "Expected a vector i1 type!")
? void (0) : __assert_fail ("(VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && \"Expected a vector i1 type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9218, __extension__
__PRETTY_FUNCTION__))
9218 "Expected a vector i1 type!")(static_cast <bool> ((VT == MVT::v16i1 || VT == MVT::v8i1
|| VT == MVT::v4i1) && "Expected a vector i1 type!")
? void (0) : __assert_fail ("(VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && \"Expected a vector i1 type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9218, __extension__
__PRETTY_FUNCTION__))
;
9219 SDValue Op = N->getOperand(0);
9220 EVT FromVT = Op.getValueType();
9221 SDLoc DL(N);
9222
9223 SDValue And =
9224 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9225 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9226 DAG.getCondCode(ISD::SETNE));
9227}
9228
9229static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG,
9230 const ARMSubtarget *Subtarget) {
9231 if (!Subtarget->hasMVEIntegerOps())
9232 return SDValue();
9233
9234 EVT ToVT = N->getValueType(0);
9235 if (ToVT.getScalarType() == MVT::i1)
9236 return LowerTruncatei1(N, DAG, Subtarget);
9237
9238 // MVE does not have a single instruction to perform the truncation of a v4i32
9239 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9240 // Most of the instructions in MVE follow the 'Beats' system, where moving
9241 // values from different lanes is usually something that the instructions
9242 // avoid.
9243 //
9244 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9245 // which take a the top/bottom half of a larger lane and extend it (or do the
9246 // opposite, truncating into the top/bottom lane from a larger lane). Note
9247 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9248 // bottom 16bits from each vector lane. This works really well with T/B
9249 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9250 // to move order.
9251 //
9252 // But truncates and sext/zext are always going to be fairly common from llvm.
9253 // We have several options for how to deal with them:
9254 // - Wherever possible combine them into an instruction that makes them
9255 // "free". This includes loads/stores, which can perform the trunc as part
9256 // of the memory operation. Or certain shuffles that can be turned into
9257 // VMOVN/VMOVL.
9258 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9259 // trunc(mul(sext(a), sext(b))) may become
9260 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9261 // this case can use VMULL). This is performed in the
9262 // MVELaneInterleavingPass.
9263 // - Otherwise we have an option. By default we would expand the
9264 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9265 // registers. One for each vector lane in the vector. This can obviously be
9266 // very expensive.
9267 // - The other option is to use the fact that loads/store can extend/truncate
9268 // to turn a trunc into two truncating stack stores and a stack reload. This
9269 // becomes 3 back-to-back memory operations, but at least that is less than
9270 // all the insert/extracts.
9271 //
9272 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9273 // are either optimized where they can be, or eventually lowered into stack
9274 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9275 // two early, where other instructions would be better, and stops us from
9276 // having to reconstruct multiple buildvector shuffles into loads/stores.
9277 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9278 return SDValue();
9279 EVT FromVT = N->getOperand(0).getValueType();
9280 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9281 return SDValue();
9282
9283 SDValue Lo, Hi;
9284 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9285 SDLoc DL(N);
9286 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9287}
9288
9289static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG,
9290 const ARMSubtarget *Subtarget) {
9291 if (!Subtarget->hasMVEIntegerOps())
9292 return SDValue();
9293
9294 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9295
9296 EVT ToVT = N->getValueType(0);
9297 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9298 return SDValue();
9299 SDValue Op = N->getOperand(0);
9300 EVT FromVT = Op.getValueType();
9301 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9302 return SDValue();
9303
9304 SDLoc DL(N);
9305 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9306 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9307 ExtVT = MVT::v8i16;
9308
9309 unsigned Opcode =
9310 N->getOpcode() == ISD::SIGN_EXTEND ? ARMISD::MVESEXT : ARMISD::MVEZEXT;
9311 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9312 SDValue Ext1 = Ext.getValue(1);
9313
9314 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9315 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9316 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9317 }
9318
9319 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9320}
9321
9322/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9323/// element has been zero/sign-extended, depending on the isSigned parameter,
9324/// from an integer type half its size.
9325static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
9326 bool isSigned) {
9327 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9328 EVT VT = N->getValueType(0);
9329 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9330 SDNode *BVN = N->getOperand(0).getNode();
9331 if (BVN->getValueType(0) != MVT::v4i32 ||
9332 BVN->getOpcode() != ISD::BUILD_VECTOR)
9333 return false;
9334 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9335 unsigned HiElt = 1 - LoElt;
9336 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9337 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9338 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9339 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9340 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9341 return false;
9342 if (isSigned) {
9343 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9344 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9345 return true;
9346 } else {
9347 if (Hi0->isZero() && Hi1->isZero())
9348 return true;
9349 }
9350 return false;
9351 }
9352
9353 if (N->getOpcode() != ISD::BUILD_VECTOR)
9354 return false;
9355
9356 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9357 SDNode *Elt = N->getOperand(i).getNode();
9358 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9359 unsigned EltSize = VT.getScalarSizeInBits();
9360 unsigned HalfSize = EltSize / 2;
9361 if (isSigned) {
9362 if (!isIntN(HalfSize, C->getSExtValue()))
9363 return false;
9364 } else {
9365 if (!isUIntN(HalfSize, C->getZExtValue()))
9366 return false;
9367 }
9368 continue;
9369 }
9370 return false;
9371 }
9372
9373 return true;
9374}
9375
9376/// isSignExtended - Check if a node is a vector value that is sign-extended
9377/// or a constant BUILD_VECTOR with sign-extended elements.
9378static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
9379 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9380 return true;
9381 if (isExtendedBUILD_VECTOR(N, DAG, true))
9382 return true;
9383 return false;
9384}
9385
9386/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9387/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9388static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
9389 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9390 ISD::isZEXTLoad(N))
9391 return true;
9392 if (isExtendedBUILD_VECTOR(N, DAG, false))
9393 return true;
9394 return false;
9395}
9396
9397static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9398 if (OrigVT.getSizeInBits() >= 64)
9399 return OrigVT;
9400
9401 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9401, __extension__
__PRETTY_FUNCTION__))
;
9402
9403 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9404 switch (OrigSimpleTy) {
9405 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9405)
;
9406 case MVT::v2i8:
9407 case MVT::v2i16:
9408 return MVT::v2i32;
9409 case MVT::v4i8:
9410 return MVT::v4i16;
9411 }
9412}
9413
9414/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9415/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9416/// We insert the required extension here to get the vector to fill a D register.
9417static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
9418 const EVT &OrigTy,
9419 const EVT &ExtTy,
9420 unsigned ExtOpcode) {
9421 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9422 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9423 // 64-bits we need to insert a new extension so that it will be 64-bits.
9424 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9424, __extension__
__PRETTY_FUNCTION__))
;
9425 if (OrigTy.getSizeInBits() >= 64)
9426 return N;
9427
9428 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9429 EVT NewVT = getExtensionTo64Bits(OrigTy);
9430
9431 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9432}
9433
9434/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9435/// does not do any sign/zero extension. If the original vector is less
9436/// than 64 bits, an appropriate extension will be added after the load to
9437/// reach a total size of 64 bits. We have to add the extension separately
9438/// because ARM does not have a sign/zero extending load for vectors.
9439static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
9440 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9441
9442 // The load already has the right type.
9443 if (ExtendedTy == LD->getMemoryVT())
9444 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9445 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9446 LD->getMemOperand()->getFlags());
9447
9448 // We need to create a zextload/sextload. We cannot just create a load
9449 // followed by a zext/zext node because LowerMUL is also run during normal
9450 // operation legalization where we can't create illegal types.
9451 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9452 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9453 LD->getMemoryVT(), LD->getAlign(),
9454 LD->getMemOperand()->getFlags());
9455}
9456
9457/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9458/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9459/// the unextended value. The unextended vector should be 64 bits so that it can
9460/// be used as an operand to a VMULL instruction. If the original vector size
9461/// before extension is less than 64 bits we add a an extension to resize
9462/// the vector to 64 bits.
9463static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
9464 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9465 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9466 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9467 N->getOperand(0)->getValueType(0),
9468 N->getValueType(0),
9469 N->getOpcode());
9470
9471 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9472 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9473, __extension__
__PRETTY_FUNCTION__))
9473 "Expected extending load")(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9473, __extension__
__PRETTY_FUNCTION__))
;
9474
9475 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9476 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9477 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9478 SDValue extLoad =
9479 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9480 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9481
9482 return newLoad;
9483 }
9484
9485 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9486 // have been legalized as a BITCAST from v4i32.
9487 if (N->getOpcode() == ISD::BITCAST) {
9488 SDNode *BVN = N->getOperand(0).getNode();
9489 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9490, __extension__
__PRETTY_FUNCTION__))
9490 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9490, __extension__
__PRETTY_FUNCTION__))
;
9491 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9492 return DAG.getBuildVector(
9493 MVT::v2i32, SDLoc(N),
9494 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9495 }
9496 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9497 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9497, __extension__
__PRETTY_FUNCTION__))
;
9498 EVT VT = N->getValueType(0);
9499 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9500 unsigned NumElts = VT.getVectorNumElements();
9501 MVT TruncVT = MVT::getIntegerVT(EltSize);
9502 SmallVector<SDValue, 8> Ops;
9503 SDLoc dl(N);
9504 for (unsigned i = 0; i != NumElts; ++i) {
9505 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
9506 const APInt &CInt = C->getAPIntValue();
9507 // Element types smaller than 32 bits are not legal, so use i32 elements.
9508 // The values are implicitly truncated so sext vs. zext doesn't matter.
9509 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9510 }
9511 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9512}
9513
9514static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9515 unsigned Opcode = N->getOpcode();
9516 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9517 SDNode *N0 = N->getOperand(0).getNode();
9518 SDNode *N1 = N->getOperand(1).getNode();
9519 return N0->hasOneUse() && N1->hasOneUse() &&
9520 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9521 }
9522 return false;
9523}
9524
9525static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9526 unsigned Opcode = N->getOpcode();
9527 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9528 SDNode *N0 = N->getOperand(0).getNode();
9529 SDNode *N1 = N->getOperand(1).getNode();
9530 return N0->hasOneUse() && N1->hasOneUse() &&
9531 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9532 }
9533 return false;
9534}
9535
9536static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
9537 // Multiplications are only custom-lowered for 128-bit vectors so that
9538 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9539 EVT VT = Op.getValueType();
9540 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9541, __extension__
__PRETTY_FUNCTION__))
9541 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9541, __extension__
__PRETTY_FUNCTION__))
;
9542 SDNode *N0 = Op.getOperand(0).getNode();
9543 SDNode *N1 = Op.getOperand(1).getNode();
9544 unsigned NewOpc = 0;
9545 bool isMLA = false;
9546 bool isN0SExt = isSignExtended(N0, DAG);
9547 bool isN1SExt = isSignExtended(N1, DAG);
9548 if (isN0SExt && isN1SExt)
9549 NewOpc = ARMISD::VMULLs;
9550 else {
9551 bool isN0ZExt = isZeroExtended(N0, DAG);
9552 bool isN1ZExt = isZeroExtended(N1, DAG);
9553 if (isN0ZExt && isN1ZExt)
9554 NewOpc = ARMISD::VMULLu;
9555 else if (isN1SExt || isN1ZExt) {
9556 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9557 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9558 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9559 NewOpc = ARMISD::VMULLs;
9560 isMLA = true;
9561 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9562 NewOpc = ARMISD::VMULLu;
9563 isMLA = true;
9564 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9565 std::swap(N0, N1);
9566 NewOpc = ARMISD::VMULLu;
9567 isMLA = true;
9568 }
9569 }
9570
9571 if (!NewOpc) {
9572 if (VT == MVT::v2i64)
9573 // Fall through to expand this. It is not legal.
9574 return SDValue();
9575 else
9576 // Other vector multiplications are legal.
9577 return Op;
9578 }
9579 }
9580
9581 // Legalize to a VMULL instruction.
9582 SDLoc DL(Op);
9583 SDValue Op0;
9584 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9585 if (!isMLA) {
9586 Op0 = SkipExtensionForVMULL(N0, DAG);
9587 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9589, __extension__
__PRETTY_FUNCTION__))
9588 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9589, __extension__
__PRETTY_FUNCTION__))
9589 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9589, __extension__
__PRETTY_FUNCTION__))
;
9590 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9591 }
9592
9593 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9594 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9595 // vmull q0, d4, d6
9596 // vmlal q0, d5, d6
9597 // is faster than
9598 // vaddl q0, d4, d5
9599 // vmovl q1, d6
9600 // vmul q0, q0, q1
9601 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9602 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9603 EVT Op1VT = Op1.getValueType();
9604 return DAG.getNode(N0->getOpcode(), DL, VT,
9605 DAG.getNode(NewOpc, DL, VT,
9606 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9607 DAG.getNode(NewOpc, DL, VT,
9608 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9609}
9610
9611static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
9612 SelectionDAG &DAG) {
9613 // TODO: Should this propagate fast-math-flags?
9614
9615 // Convert to float
9616 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9617 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9618 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9619 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9620 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9621 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9622 // Get reciprocal estimate.
9623 // float4 recip = vrecpeq_f32(yf);
9624 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9625 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9626 Y);
9627 // Because char has a smaller range than uchar, we can actually get away
9628 // without any newton steps. This requires that we use a weird bias
9629 // of 0xb000, however (again, this has been exhaustively tested).
9630 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9631 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9632 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9633 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9634 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9635 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9636 // Convert back to short.
9637 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9638 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9639 return X;
9640}
9641
9642static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
9643 SelectionDAG &DAG) {
9644 // TODO: Should this propagate fast-math-flags?
9645
9646 SDValue N2;
9647 // Convert to float.
9648 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9649 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9650 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9651 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9652 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9653 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9654
9655 // Use reciprocal estimate and one refinement step.
9656 // float4 recip = vrecpeq_f32(yf);
9657 // recip *= vrecpsq_f32(yf, recip);
9658 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9659 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9660 N1);
9661 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9662 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9663 N1, N2);
9664 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9665 // Because short has a smaller range than ushort, we can actually get away
9666 // with only a single newton step. This requires that we use a weird bias
9667 // of 89, however (again, this has been exhaustively tested).
9668 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9669 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9670 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9671 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9672 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9673 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9674 // Convert back to integer and return.
9675 // return vmovn_s32(vcvt_s32_f32(result));
9676 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9677 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9678 return N0;
9679}
9680
9681static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG,
9682 const ARMSubtarget *ST) {
9683 EVT VT = Op.getValueType();
9684 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9685, __extension__
__PRETTY_FUNCTION__))
9685 "unexpected type for custom-lowering ISD::SDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9685, __extension__
__PRETTY_FUNCTION__))
;
9686
9687 SDLoc dl(Op);
9688 SDValue N0 = Op.getOperand(0);
9689 SDValue N1 = Op.getOperand(1);
9690 SDValue N2, N3;
9691
9692 if (VT == MVT::v8i8) {
9693 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9694 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9695
9696 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9697 DAG.getIntPtrConstant(4, dl));
9698 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9699 DAG.getIntPtrConstant(4, dl));
9700 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9701 DAG.getIntPtrConstant(0, dl));
9702 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9703 DAG.getIntPtrConstant(0, dl));
9704
9705 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9706 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9707
9708 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9709 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9710
9711 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9712 return N0;
9713 }
9714 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9715}
9716
9717static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG,
9718 const ARMSubtarget *ST) {
9719 // TODO: Should this propagate fast-math-flags?
9720 EVT VT = Op.getValueType();
9721 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9722, __extension__
__PRETTY_FUNCTION__))
9722 "unexpected type for custom-lowering ISD::UDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9722, __extension__
__PRETTY_FUNCTION__))
;
9723
9724 SDLoc dl(Op);
9725 SDValue N0 = Op.getOperand(0);
9726 SDValue N1 = Op.getOperand(1);
9727 SDValue N2, N3;
9728
9729 if (VT == MVT::v8i8) {
9730 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9731 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9732
9733 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9734 DAG.getIntPtrConstant(4, dl));
9735 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9736 DAG.getIntPtrConstant(4, dl));
9737 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9738 DAG.getIntPtrConstant(0, dl));
9739 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9740 DAG.getIntPtrConstant(0, dl));
9741
9742 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9743 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9744
9745 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9746 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9747
9748 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9749 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9750 MVT::i32),
9751 N0);
9752 return N0;
9753 }
9754
9755 // v4i16 sdiv ... Convert to float.
9756 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9757 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9758 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9759 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9760 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9761 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9762
9763 // Use reciprocal estimate and two refinement steps.
9764 // float4 recip = vrecpeq_f32(yf);
9765 // recip *= vrecpsq_f32(yf, recip);
9766 // recip *= vrecpsq_f32(yf, recip);
9767 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9768 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9769 BN1);
9770 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9771 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9772 BN1, N2);
9773 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9774 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9775 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9776 BN1, N2);
9777 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9778 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9779 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9780 // and that it will never cause us to return an answer too large).
9781 // float4 result = as_float4(as_int4(xf*recip) + 2);
9782 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9783 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9784 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9785 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9786 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9787 // Convert back to integer and return.
9788 // return vmovn_u32(vcvt_s32_f32(result));
9789 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9790 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9791 return N0;
9792}
9793
9794static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
9795 SDNode *N = Op.getNode();
9796 EVT VT = N->getValueType(0);
9797 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9798
9799 SDValue Carry = Op.getOperand(2);
9800
9801 SDLoc DL(Op);
9802
9803 SDValue Result;
9804 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9805 // This converts the boolean value carry into the carry flag.
9806 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9807
9808 // Do the addition proper using the carry flag we wanted.
9809 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9810 Op.getOperand(1), Carry);
9811
9812 // Now convert the carry flag into a boolean value.
9813 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9814 } else {
9815 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9816 // have to invert the carry first.
9817 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9818 DAG.getConstant(1, DL, MVT::i32), Carry);
9819 // This converts the boolean value carry into the carry flag.
9820 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9821
9822 // Do the subtraction proper using the carry flag we wanted.
9823 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9824 Op.getOperand(1), Carry);
9825
9826 // Now convert the carry flag into a boolean value.
9827 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9828 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9829 // by ISD::USUBO_CARRY, so compute 1 - C.
9830 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9831 DAG.getConstant(1, DL, MVT::i32), Carry);
9832 }
9833
9834 // Return both values.
9835 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9836}
9837
9838SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9839 assert(Subtarget->isTargetDarwin())(static_cast <bool> (Subtarget->isTargetDarwin()) ? void
(0) : __assert_fail ("Subtarget->isTargetDarwin()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9839, __extension__ __PRETTY_FUNCTION__))
;
9840
9841 // For iOS, we want to call an alternative entry point: __sincos_stret,
9842 // return values are passed via sret.
9843 SDLoc dl(Op);
9844 SDValue Arg = Op.getOperand(0);
9845 EVT ArgVT = Arg.getValueType();
9846 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9847 auto PtrVT = getPointerTy(DAG.getDataLayout());
9848
9849 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9850 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9851
9852 // Pair of floats / doubles used to pass the result.
9853 Type *RetTy = StructType::get(ArgTy, ArgTy);
9854 auto &DL = DAG.getDataLayout();
9855
9856 ArgListTy Args;
9857 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9858 SDValue SRet;
9859 if (ShouldUseSRet) {
9860 // Create stack object for sret.
9861 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9862 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9863 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9864 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9865
9866 ArgListEntry Entry;
9867 Entry.Node = SRet;
9868 Entry.Ty = RetTy->getPointerTo();
9869 Entry.IsSExt = false;
9870 Entry.IsZExt = false;
9871 Entry.IsSRet = true;
9872 Args.push_back(Entry);
9873 RetTy = Type::getVoidTy(*DAG.getContext());
9874 }
9875
9876 ArgListEntry Entry;
9877 Entry.Node = Arg;
9878 Entry.Ty = ArgTy;
9879 Entry.IsSExt = false;
9880 Entry.IsZExt = false;
9881 Args.push_back(Entry);
9882
9883 RTLIB::Libcall LC =
9884 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9885 const char *LibcallName = getLibcallName(LC);
9886 CallingConv::ID CC = getLibcallCallingConv(LC);
9887 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9888
9889 TargetLowering::CallLoweringInfo CLI(DAG);
9890 CLI.setDebugLoc(dl)
9891 .setChain(DAG.getEntryNode())
9892 .setCallee(CC, RetTy, Callee, std::move(Args))
9893 .setDiscardResult(ShouldUseSRet);
9894 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9895
9896 if (!ShouldUseSRet)
9897 return CallResult.first;
9898
9899 SDValue LoadSin =
9900 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9901
9902 // Address of cos field.
9903 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9904 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9905 SDValue LoadCos =
9906 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9907
9908 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9909 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9910 LoadSin.getValue(0), LoadCos.getValue(0));
9911}
9912
9913SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9914 bool Signed,
9915 SDValue &Chain) const {
9916 EVT VT = Op.getValueType();
9917 assert((VT == MVT::i32 || VT == MVT::i64) &&(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9918, __extension__
__PRETTY_FUNCTION__))
9918 "unexpected type for custom lowering DIV")(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9918, __extension__
__PRETTY_FUNCTION__))
;
9919 SDLoc dl(Op);
9920
9921 const auto &DL = DAG.getDataLayout();
9922 const auto &TLI = DAG.getTargetLoweringInfo();
9923
9924 const char *Name = nullptr;
9925 if (Signed)
9926 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9927 else
9928 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9929
9930 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
9931
9932 ARMTargetLowering::ArgListTy Args;
9933
9934 for (auto AI : {1, 0}) {
9935 ArgListEntry Arg;
9936 Arg.Node = Op.getOperand(AI);
9937 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
9938 Args.push_back(Arg);
9939 }
9940
9941 CallLoweringInfo CLI(DAG);
9942 CLI.setDebugLoc(dl)
9943 .setChain(Chain)
9944 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
9945 ES, std::move(Args));
9946
9947 return LowerCallTo(CLI).first;
9948}
9949
9950// This is a code size optimisation: return the original SDIV node to
9951// DAGCombiner when we don't want to expand SDIV into a sequence of
9952// instructions, and an empty node otherwise which will cause the
9953// SDIV to be expanded in DAGCombine.
9954SDValue
9955ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
9956 SelectionDAG &DAG,
9957 SmallVectorImpl<SDNode *> &Created) const {
9958 // TODO: Support SREM
9959 if (N->getOpcode() != ISD::SDIV)
9960 return SDValue();
9961
9962 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
9963 const bool MinSize = ST.hasMinSize();
9964 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
9965 : ST.hasDivideInARMMode();
9966
9967 // Don't touch vector types; rewriting this may lead to scalarizing
9968 // the int divs.
9969 if (N->getOperand(0).getValueType().isVector())
9970 return SDValue();
9971
9972 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9973 // hwdiv support for this to be really profitable.
9974 if (!(MinSize && HasDivide))
9975 return SDValue();
9976
9977 // ARM mode is a bit simpler than Thumb: we can handle large power
9978 // of 2 immediates with 1 mov instruction; no further checks required,
9979 // just return the sdiv node.
9980 if (!ST.isThumb())
9981 return SDValue(N, 0);
9982
9983 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9984 // and thus lose the code size benefits of a MOVS that requires only 2.
9985 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9986 // but as it's doing exactly this, it's not worth the trouble to get TTI.
9987 if (Divisor.sgt(128))
9988 return SDValue();
9989
9990 return SDValue(N, 0);
9991}
9992
9993SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9994 bool Signed) const {
9995 assert(Op.getValueType() == MVT::i32 &&(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9996, __extension__
__PRETTY_FUNCTION__))
9996 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9996, __extension__
__PRETTY_FUNCTION__))
;
9997 SDLoc dl(Op);
9998
9999 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10000 DAG.getEntryNode(), Op.getOperand(1));
10001
10002 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10003}
10004
10005static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
10006 SDLoc DL(N);
10007 SDValue Op = N->getOperand(1);
10008 if (N->getValueType(0) == MVT::i32)
10009 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10010 SDValue Lo, Hi;
10011 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10012 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10013 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10014}
10015
10016void ARMTargetLowering::ExpandDIV_Windows(
10017 SDValue Op, SelectionDAG &DAG, bool Signed,
10018 SmallVectorImpl<SDValue> &Results) const {
10019 const auto &DL = DAG.getDataLayout();
10020 const auto &TLI = DAG.getTargetLoweringInfo();
10021
10022 assert(Op.getValueType() == MVT::i64 &&(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10023, __extension__
__PRETTY_FUNCTION__))
10023 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10023, __extension__
__PRETTY_FUNCTION__))
;
10024 SDLoc dl(Op);
10025
10026 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10027
10028 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10029
10030 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10031 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10032 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10033 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10034
10035 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10036}
10037
10038static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
10039 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10040 EVT MemVT = LD->getMemoryVT();
10041 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10043, __extension__
__PRETTY_FUNCTION__))
10042 MemVT == MVT::v16i1) &&(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10043, __extension__
__PRETTY_FUNCTION__))
10043 "Expected a predicate type!")(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10043, __extension__
__PRETTY_FUNCTION__))
;
10044 assert(MemVT == Op.getValueType())(static_cast <bool> (MemVT == Op.getValueType()) ? void
(0) : __assert_fail ("MemVT == Op.getValueType()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10044, __extension__ __PRETTY_FUNCTION__))
;
10045 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&(static_cast <bool> (LD->getExtensionType() == ISD::
NON_EXTLOAD && "Expected a non-extending load") ? void
(0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD && \"Expected a non-extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10046, __extension__
__PRETTY_FUNCTION__))
10046 "Expected a non-extending load")(static_cast <bool> (LD->getExtensionType() == ISD::
NON_EXTLOAD && "Expected a non-extending load") ? void
(0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD && \"Expected a non-extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10046, __extension__
__PRETTY_FUNCTION__))
;
10047 assert(LD->isUnindexed() && "Expected a unindexed load")(static_cast <bool> (LD->isUnindexed() && "Expected a unindexed load"
) ? void (0) : __assert_fail ("LD->isUnindexed() && \"Expected a unindexed load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10047, __extension__
__PRETTY_FUNCTION__))
;
10048
10049 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10050 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10051 // need to make sure that 8/4/2 bits are actually loaded into the correct
10052 // place, which means loading the value and then shuffling the values into
10053 // the bottom bits of the predicate.
10054 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10055 // for BE).
10056 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10057 // a natural VMSR(load), so needs to be reversed.
10058
10059 SDLoc dl(Op);
10060 SDValue Load = DAG.getExtLoad(
10061 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10062 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
10063 LD->getMemOperand());
10064 SDValue Val = Load;
10065 if (DAG.getDataLayout().isBigEndian())
10066 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10067 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10068 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10069 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10070 if (MemVT != MVT::v16i1)
10071 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10072 DAG.getConstant(0, dl, MVT::i32));
10073 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10074}
10075
10076void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10077 SelectionDAG &DAG) const {
10078 LoadSDNode *LD = cast<LoadSDNode>(N);
10079 EVT MemVT = LD->getMemoryVT();
10080 assert(LD->isUnindexed() && "Loads should be unindexed at this point.")(static_cast <bool> (LD->isUnindexed() && "Loads should be unindexed at this point."
) ? void (0) : __assert_fail ("LD->isUnindexed() && \"Loads should be unindexed at this point.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10080, __extension__
__PRETTY_FUNCTION__))
;
10081
10082 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10083 !Subtarget->isThumb1Only() && LD->isVolatile()) {
10084 SDLoc dl(N);
10085 SDValue Result = DAG.getMemIntrinsicNode(
10086 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10087 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10088 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10089 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10090 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10091 Results.append({Pair, Result.getValue(2)});
10092 }
10093}
10094
10095static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
10096 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10097 EVT MemVT = ST->getMemoryVT();
10098 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10100, __extension__
__PRETTY_FUNCTION__))
10099 MemVT == MVT::v16i1) &&(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10100, __extension__
__PRETTY_FUNCTION__))
10100 "Expected a predicate type!")(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10100, __extension__
__PRETTY_FUNCTION__))
;
10101 assert(MemVT == ST->getValue().getValueType())(static_cast <bool> (MemVT == ST->getValue().getValueType
()) ? void (0) : __assert_fail ("MemVT == ST->getValue().getValueType()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10101, __extension__
__PRETTY_FUNCTION__))
;
10102 assert(!ST->isTruncatingStore() && "Expected a non-extending store")(static_cast <bool> (!ST->isTruncatingStore() &&
"Expected a non-extending store") ? void (0) : __assert_fail
("!ST->isTruncatingStore() && \"Expected a non-extending store\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10102, __extension__
__PRETTY_FUNCTION__))
;
10103 assert(ST->isUnindexed() && "Expected a unindexed store")(static_cast <bool> (ST->isUnindexed() && "Expected a unindexed store"
) ? void (0) : __assert_fail ("ST->isUnindexed() && \"Expected a unindexed store\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10103, __extension__
__PRETTY_FUNCTION__))
;
10104
10105 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10106 // top bits unset and a scalar store.
10107 SDLoc dl(Op);
10108 SDValue Build = ST->getValue();
10109 if (MemVT != MVT::v16i1) {
10110 SmallVector<SDValue, 16> Ops;
10111 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10112 unsigned Elt = DAG.getDataLayout().isBigEndian()
10113 ? MemVT.getVectorNumElements() - I - 1
10114 : I;
10115 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10116 DAG.getConstant(Elt, dl, MVT::i32)));
10117 }
10118 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10119 Ops.push_back(DAG.getUNDEF(MVT::i32));
10120 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10121 }
10122 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10123 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10124 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10125 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10126 DAG.getConstant(16, dl, MVT::i32));
10127 return DAG.getTruncStore(
10128 ST->getChain(), dl, GRP, ST->getBasePtr(),
10129 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
10130 ST->getMemOperand());
10131}
10132
10133static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
10134 const ARMSubtarget *Subtarget) {
10135 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10136 EVT MemVT = ST->getMemoryVT();
10137 assert(ST->isUnindexed() && "Stores should be unindexed at this point.")(static_cast <bool> (ST->isUnindexed() && "Stores should be unindexed at this point."
) ? void (0) : __assert_fail ("ST->isUnindexed() && \"Stores should be unindexed at this point.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10137, __extension__
__PRETTY_FUNCTION__))
;
10138
10139 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10140 !Subtarget->isThumb1Only() && ST->isVolatile()) {
10141 SDNode *N = Op.getNode();
10142 SDLoc dl(N);
10143
10144 SDValue Lo = DAG.getNode(
10145 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10146 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10147 MVT::i32));
10148 SDValue Hi = DAG.getNode(
10149 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10150 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10151 MVT::i32));
10152
10153 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10154 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10155 MemVT, ST->getMemOperand());
10156 } else if (Subtarget->hasMVEIntegerOps() &&
10157 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10158 MemVT == MVT::v16i1))) {
10159 return LowerPredicateStore(Op, DAG);
10160 }
10161
10162 return SDValue();
10163}
10164
10165static bool isZeroVector(SDValue N) {
10166 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10167 (N->getOpcode() == ARMISD::VMOVIMM &&
10168 isNullConstant(N->getOperand(0))));
10169}
10170
10171static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
10172 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10173 MVT VT = Op.getSimpleValueType();
10174 SDValue Mask = N->getMask();
10175 SDValue PassThru = N->getPassThru();
10176 SDLoc dl(Op);
10177
10178 if (isZeroVector(PassThru))
10179 return Op;
10180
10181 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10182 // zero too, and other values are lowered to a select.
10183 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10184 DAG.getTargetConstant(0, dl, MVT::i32));
10185 SDValue NewLoad = DAG.getMaskedLoad(
10186 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10187 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10188 N->getExtensionType(), N->isExpandingLoad());
10189 SDValue Combo = NewLoad;
10190 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10191 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10192 isZeroVector(PassThru->getOperand(0));
10193 if (!PassThru.isUndef() && !PassThruIsCastZero)
10194 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10195 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10196}
10197
10198static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG,
10199 const ARMSubtarget *ST) {
10200 if (!ST->hasMVEIntegerOps())
10201 return SDValue();
10202
10203 SDLoc dl(Op);
10204 unsigned BaseOpcode = 0;
10205 switch (Op->getOpcode()) {
10206 default: llvm_unreachable("Expected VECREDUCE opcode")::llvm::llvm_unreachable_internal("Expected VECREDUCE opcode"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10206)
;
10207 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10208 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10209 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10210 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10211 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10212 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10213 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10214 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10215 }
10216
10217 SDValue Op0 = Op->getOperand(0);
10218 EVT VT = Op0.getValueType();
10219 EVT EltVT = VT.getVectorElementType();
10220 unsigned NumElts = VT.getVectorNumElements();
10221 unsigned NumActiveLanes = NumElts;
10222
10223 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10225, __extension__
__PRETTY_FUNCTION__))
10224 NumActiveLanes == 2) &&(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10225, __extension__
__PRETTY_FUNCTION__))
10225 "Only expected a power 2 vector size")(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10225, __extension__
__PRETTY_FUNCTION__))
;
10226
10227 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10228 // allows us to easily extract vector elements from the lanes.
10229 while (NumActiveLanes > 4) {
10230 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10231 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10232 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10233 NumActiveLanes /= 2;
10234 }
10235
10236 SDValue Res;
10237 if (NumActiveLanes == 4) {
10238 // The remaining 4 elements are summed sequentially
10239 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10240 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10241 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10242 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10243 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10244 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10245 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10246 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10247 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10248 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10249 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10250 } else {
10251 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10252 DAG.getConstant(0, dl, MVT::i32));
10253 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10254 DAG.getConstant(1, dl, MVT::i32));
10255 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10256 }
10257
10258 // Result type may be wider than element type.
10259 if (EltVT != Op->getValueType(0))
10260 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10261 return Res;
10262}
10263
10264static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG,
10265 const ARMSubtarget *ST) {
10266 if (!ST->hasMVEFloatOps())
10267 return SDValue();
10268 return LowerVecReduce(Op, DAG, ST);
10269}
10270
10271static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG,
10272 const ARMSubtarget *ST) {
10273 if (!ST->hasNEON())
10274 return SDValue();
10275
10276 SDLoc dl(Op);
10277 SDValue Op0 = Op->getOperand(0);
10278 EVT VT = Op0.getValueType();
10279 EVT EltVT = VT.getVectorElementType();
10280
10281 unsigned PairwiseIntrinsic = 0;
10282 switch (Op->getOpcode()) {
10283 default:
10284 llvm_unreachable("Expected VECREDUCE opcode")::llvm::llvm_unreachable_internal("Expected VECREDUCE opcode"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10284)
;
10285 case ISD::VECREDUCE_UMIN:
10286 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10287 break;
10288 case ISD::VECREDUCE_UMAX:
10289 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10290 break;
10291 case ISD::VECREDUCE_SMIN:
10292 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10293 break;
10294 case ISD::VECREDUCE_SMAX:
10295 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10296 break;
10297 }
10298 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10299
10300 unsigned NumElts = VT.getVectorNumElements();
10301 unsigned NumActiveLanes = NumElts;
10302
10303 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10305, __extension__
__PRETTY_FUNCTION__))
10304 NumActiveLanes == 2) &&(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10305, __extension__
__PRETTY_FUNCTION__))
10305 "Only expected a power 2 vector size")(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10305, __extension__
__PRETTY_FUNCTION__))
;
10306
10307 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10308 if (VT.is128BitVector()) {
10309 SDValue Lo, Hi;
10310 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10311 VT = Lo.getValueType();
10312 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10313 NumActiveLanes /= 2;
10314 }
10315
10316 // Use pairwise reductions until one lane remains
10317 while (NumActiveLanes > 1) {
10318 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10319 NumActiveLanes /= 2;
10320 }
10321
10322 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10323 DAG.getConstant(0, dl, MVT::i32));
10324
10325 // Result type may be wider than element type.
10326 if (EltVT != Op.getValueType()) {
10327 unsigned Extend = 0;
10328 switch (Op->getOpcode()) {
10329 default:
10330 llvm_unreachable("Expected VECREDUCE opcode")::llvm::llvm_unreachable_internal("Expected VECREDUCE opcode"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10330)
;
10331 case ISD::VECREDUCE_UMIN:
10332 case ISD::VECREDUCE_UMAX:
10333 Extend = ISD::ZERO_EXTEND;
10334 break;
10335 case ISD::VECREDUCE_SMIN:
10336 case ISD::VECREDUCE_SMAX:
10337 Extend = ISD::SIGN_EXTEND;
10338 break;
10339 }
10340 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10341 }
10342 return Res;
10343}
10344
10345static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
10346 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10347 // Acquire/Release load/store is not legal for targets without a dmb or
10348 // equivalent available.
10349 return SDValue();
10350
10351 // Monotonic load/store is legal for all targets.
10352 return Op;
10353}
10354
10355static void ReplaceREADCYCLECOUNTER(SDNode *N,
10356 SmallVectorImpl<SDValue> &Results,
10357 SelectionDAG &DAG,
10358 const ARMSubtarget *Subtarget) {
10359 SDLoc DL(N);
10360 // Under Power Management extensions, the cycle-count is:
10361 // mrc p15, #0, <Rt>, c9, c13, #0
10362 SDValue Ops[] = { N->getOperand(0), // Chain
10363 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10364 DAG.getTargetConstant(15, DL, MVT::i32),
10365 DAG.getTargetConstant(0, DL, MVT::i32),
10366 DAG.getTargetConstant(9, DL, MVT::i32),
10367 DAG.getTargetConstant(13, DL, MVT::i32),
10368 DAG.getTargetConstant(0, DL, MVT::i32)
10369 };
10370
10371 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10372 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10373 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10374 DAG.getConstant(0, DL, MVT::i32)));
10375 Results.push_back(Cycles32.getValue(1));
10376}
10377
10378static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
10379 SDLoc dl(V.getNode());
10380 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
10381 SDValue VHi = DAG.getAnyExtOrTrunc(
10382 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
10383 dl, MVT::i32);
10384 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10385 if (isBigEndian)
10386 std::swap (VLo, VHi);
10387 SDValue RegClass =
10388 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10389 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10390 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10391 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10392 return SDValue(
10393 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10394}
10395
10396static void ReplaceCMP_SWAP_64Results(SDNode *N,
10397 SmallVectorImpl<SDValue> & Results,
10398 SelectionDAG &DAG) {
10399 assert(N->getValueType(0) == MVT::i64 &&(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10400, __extension__
__PRETTY_FUNCTION__))
10400 "AtomicCmpSwap on types less than 64 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10400, __extension__
__PRETTY_FUNCTION__))
;
10401 SDValue Ops[] = {N->getOperand(1),
10402 createGPRPairNode(DAG, N->getOperand(2)),
10403 createGPRPairNode(DAG, N->getOperand(3)),
10404 N->getOperand(0)};
10405 SDNode *CmpSwap = DAG.getMachineNode(
10406 ARM::CMP_SWAP_64, SDLoc(N),
10407 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10408
10409 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10410 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10411
10412 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10413
10414 SDValue Lo =
10415 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10416 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10417 SDValue Hi =
10418 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10419 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10420 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10421 Results.push_back(SDValue(CmpSwap, 2));
10422}
10423
10424SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10425 SDLoc dl(Op);
10426 EVT VT = Op.getValueType();
10427 SDValue Chain = Op.getOperand(0);
10428 SDValue LHS = Op.getOperand(1);
10429 SDValue RHS = Op.getOperand(2);
10430 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10431 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10432
10433 // If we don't have instructions of this float type then soften to a libcall
10434 // and use SETCC instead.
10435 if (isUnsupportedFloatingType(LHS.getValueType())) {
10436 DAG.getTargetLoweringInfo().softenSetCCOperands(
10437 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10438 if (!RHS.getNode()) {
10439 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10440 CC = ISD::SETNE;
10441 }
10442 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10443 DAG.getCondCode(CC));
10444 return DAG.getMergeValues({Result, Chain}, dl);
10445 }
10446
10447 ARMCC::CondCodes CondCode, CondCode2;
10448 FPCCToARMCC(CC, CondCode, CondCode2);
10449
10450 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10451 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10452 // instructions using a chain instead of glue. This would also fix the problem
10453 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10454 // CondCode2 != AL.
10455 SDValue True = DAG.getConstant(1, dl, VT);
10456 SDValue False = DAG.getConstant(0, dl, VT);
10457 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10458 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10459 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10460 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10461 if (CondCode2 != ARMCC::AL) {
10462 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10463 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10464 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10465 }
10466 return DAG.getMergeValues({Result, Chain}, dl);
10467}
10468
10469SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10470 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10471
10472 EVT VT = getPointerTy(DAG.getDataLayout());
10473 SDLoc DL(Op);
10474 int FI = MFI.CreateFixedObject(4, 0, false);
10475 return DAG.getFrameIndex(FI, VT);
10476}
10477
10478SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10479 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Lowering node: "; Op.dump();
} } while (false)
;
10480 switch (Op.getOpcode()) {
10481 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10481)
;
10482 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10483 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10484 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10485 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10486 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10487 case ISD::SELECT: return LowerSELECT(Op, DAG);
10488 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10489 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10490 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10491 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10492 case ISD::VASTART: return LowerVASTART(Op, DAG);
10493 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10494 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10495 case ISD::SINT_TO_FP:
10496 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10497 case ISD::STRICT_FP_TO_SINT:
10498 case ISD::STRICT_FP_TO_UINT:
10499 case ISD::FP_TO_SINT:
10500 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10501 case ISD::FP_TO_SINT_SAT:
10502 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10503 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10504 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10505 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10506 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10507 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10508 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10509 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10510 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10511 Subtarget);
10512 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10513 case ISD::SHL:
10514 case ISD::SRL:
10515 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10516 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10517 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10518 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10519 case ISD::SRL_PARTS:
10520 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10521 case ISD::CTTZ:
10522 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10523 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10524 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10525 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10526 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10527 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10528 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10529 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10530 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10531 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10532 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10533 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10534 case ISD::SIGN_EXTEND:
10535 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10536 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10537 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10538 case ISD::MUL: return LowerMUL(Op, DAG);
10539 case ISD::SDIV:
10540 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10541 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10542 return LowerSDIV(Op, DAG, Subtarget);
10543 case ISD::UDIV:
10544 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10545 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10546 return LowerUDIV(Op, DAG, Subtarget);
10547 case ISD::UADDO_CARRY:
10548 case ISD::USUBO_CARRY:
10549 return LowerUADDSUBO_CARRY(Op, DAG);
10550 case ISD::SADDO:
10551 case ISD::SSUBO:
10552 return LowerSignedALUO(Op, DAG);
10553 case ISD::UADDO:
10554 case ISD::USUBO:
10555 return LowerUnsignedALUO(Op, DAG);
10556 case ISD::SADDSAT:
10557 case ISD::SSUBSAT:
10558 case ISD::UADDSAT:
10559 case ISD::USUBSAT:
10560 return LowerADDSUBSAT(Op, DAG, Subtarget);
10561 case ISD::LOAD:
10562 return LowerPredicateLoad(Op, DAG);
10563 case ISD::STORE:
10564 return LowerSTORE(Op, DAG, Subtarget);
10565 case ISD::MLOAD:
10566 return LowerMLOAD(Op, DAG);
10567 case ISD::VECREDUCE_MUL:
10568 case ISD::VECREDUCE_AND:
10569 case ISD::VECREDUCE_OR:
10570 case ISD::VECREDUCE_XOR:
10571 return LowerVecReduce(Op, DAG, Subtarget);
10572 case ISD::VECREDUCE_FADD:
10573 case ISD::VECREDUCE_FMUL:
10574 case ISD::VECREDUCE_FMIN:
10575 case ISD::VECREDUCE_FMAX:
10576 return LowerVecReduceF(Op, DAG, Subtarget);
10577 case ISD::VECREDUCE_UMIN:
10578 case ISD::VECREDUCE_UMAX:
10579 case ISD::VECREDUCE_SMIN:
10580 case ISD::VECREDUCE_SMAX:
10581 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10582 case ISD::ATOMIC_LOAD:
10583 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10584 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10585 case ISD::SDIVREM:
10586 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10587 case ISD::DYNAMIC_STACKALLOC:
10588 if (Subtarget->isTargetWindows())
10589 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10590 llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10590)
;
10591 case ISD::STRICT_FP_ROUND:
10592 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10593 case ISD::STRICT_FP_EXTEND:
10594 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10595 case ISD::STRICT_FSETCC:
10596 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10597 case ISD::SPONENTRY:
10598 return LowerSPONENTRY(Op, DAG);
10599 case ARMISD::WIN__DBZCHK: return SDValue();
10600 }
10601}
10602
10603static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
10604 SelectionDAG &DAG) {
10605 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10606 unsigned Opc = 0;
10607 if (IntNo == Intrinsic::arm_smlald)
10608 Opc = ARMISD::SMLALD;
10609 else if (IntNo == Intrinsic::arm_smlaldx)
10610 Opc = ARMISD::SMLALDX;
10611 else if (IntNo == Intrinsic::arm_smlsld)
10612 Opc = ARMISD::SMLSLD;
10613 else if (IntNo == Intrinsic::arm_smlsldx)
10614 Opc = ARMISD::SMLSLDX;
10615 else
10616 return;
10617
10618 SDLoc dl(N);
10619 SDValue Lo, Hi;
10620 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10621
10622 SDValue LongMul = DAG.getNode(Opc, dl,
10623 DAG.getVTList(MVT::i32, MVT::i32),
10624 N->getOperand(1), N->getOperand(2),
10625 Lo, Hi);
10626 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10627 LongMul.getValue(0), LongMul.getValue(1)));
10628}
10629
10630/// ReplaceNodeResults - Replace the results of node with an illegal result
10631/// type with new values built out of custom code.
10632void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
10633 SmallVectorImpl<SDValue> &Results,
10634 SelectionDAG &DAG) const {
10635 SDValue Res;
10636 switch (N->getOpcode()) {
10637 default:
10638 llvm_unreachable("Don't know how to custom expand this!")::llvm::llvm_unreachable_internal("Don't know how to custom expand this!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10638)
;
10639 case ISD::READ_REGISTER:
10640 ExpandREAD_REGISTER(N, Results, DAG);
10641 break;
10642 case ISD::BITCAST:
10643 Res = ExpandBITCAST(N, DAG, Subtarget);
10644 break;
10645 case ISD::SRL:
10646 case ISD::SRA:
10647 case ISD::SHL:
10648 Res = Expand64BitShift(N, DAG, Subtarget);
10649 break;
10650 case ISD::SREM:
10651 case ISD::UREM:
10652 Res = LowerREM(N, DAG);
10653 break;
10654 case ISD::SDIVREM:
10655 case ISD::UDIVREM:
10656 Res = LowerDivRem(SDValue(N, 0), DAG);
10657 assert(Res.getNumOperands() == 2 && "DivRem needs two values")(static_cast <bool> (Res.getNumOperands() == 2 &&
"DivRem needs two values") ? void (0) : __assert_fail ("Res.getNumOperands() == 2 && \"DivRem needs two values\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10657, __extension__
__PRETTY_FUNCTION__))
;
10658 Results.push_back(Res.getValue(0));
10659 Results.push_back(Res.getValue(1));
10660 return;
10661 case ISD::SADDSAT:
10662 case ISD::SSUBSAT:
10663 case ISD::UADDSAT:
10664 case ISD::USUBSAT:
10665 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10666 break;
10667 case ISD::READCYCLECOUNTER:
10668 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10669 return;
10670 case ISD::UDIV:
10671 case ISD::SDIV:
10672 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows")(static_cast <bool> (Subtarget->isTargetWindows() &&
"can only expand DIV on Windows") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"can only expand DIV on Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10672, __extension__
__PRETTY_FUNCTION__))
;
10673 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10674 Results);
10675 case ISD::ATOMIC_CMP_SWAP:
10676 ReplaceCMP_SWAP_64Results(N, Results, DAG);
10677 return;
10678 case ISD::INTRINSIC_WO_CHAIN:
10679 return ReplaceLongIntrinsic(N, Results, DAG);
10680 case ISD::LOAD:
10681 LowerLOAD(N, Results, DAG);
10682 break;
10683 case ISD::TRUNCATE:
10684 Res = LowerTruncate(N, DAG, Subtarget);
10685 break;
10686 case ISD::SIGN_EXTEND:
10687 case ISD::ZERO_EXTEND:
10688 Res = LowerVectorExtend(N, DAG, Subtarget);
10689 break;
10690 case ISD::FP_TO_SINT_SAT:
10691 case ISD::FP_TO_UINT_SAT:
10692 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10693 break;
10694 }
10695 if (Res.getNode())
10696 Results.push_back(Res);
10697}
10698
10699//===----------------------------------------------------------------------===//
10700// ARM Scheduler Hooks
10701//===----------------------------------------------------------------------===//
10702
10703/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10704/// registers the function context.
10705void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10706 MachineBasicBlock *MBB,
10707 MachineBasicBlock *DispatchBB,
10708 int FI) const {
10709 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported with SjLj\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10710, __extension__
__PRETTY_FUNCTION__))
10710 "ROPI/RWPI not currently supported with SjLj")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported with SjLj\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10710, __extension__
__PRETTY_FUNCTION__))
;
10711 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10712 DebugLoc dl = MI.getDebugLoc();
10713 MachineFunction *MF = MBB->getParent();
10714 MachineRegisterInfo *MRI = &MF->getRegInfo();
10715 MachineConstantPool *MCP = MF->getConstantPool();
10716 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
10717 const Function &F = MF->getFunction();
10718
10719 bool isThumb = Subtarget->isThumb();
10720 bool isThumb2 = Subtarget->isThumb2();
10721
10722 unsigned PCLabelId = AFI->createPICLabelUId();
10723 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10724 ARMConstantPoolValue *CPV =
10725 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10726 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10727
10728 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10729 : &ARM::GPRRegClass;
10730
10731 // Grab constant pool and fixed stack memory operands.
10732 MachineMemOperand *CPMMO =
10733 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
10734 MachineMemOperand::MOLoad, 4, Align(4));
10735
10736 MachineMemOperand *FIMMOSt =
10737 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
10738 MachineMemOperand::MOStore, 4, Align(4));
10739
10740 // Load the address of the dispatch MBB into the jump buffer.
10741 if (isThumb2) {
10742 // Incoming value: jbuf
10743 // ldr.n r5, LCPI1_1
10744 // orr r5, r5, #1
10745 // add r5, pc
10746 // str r5, [$jbuf, #+4] ; &jbuf[1]
10747 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10748 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10749 .addConstantPoolIndex(CPI)
10750 .addMemOperand(CPMMO)
10751 .add(predOps(ARMCC::AL));
10752 // Set the low bit because of thumb mode.
10753 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10754 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10755 .addReg(NewVReg1, RegState::Kill)
10756 .addImm(0x01)
10757 .add(predOps(ARMCC::AL))
10758 .add(condCodeOp());
10759 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10760 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10761 .addReg(NewVReg2, RegState::Kill)
10762 .addImm(PCLabelId);
10763 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10764 .addReg(NewVReg3, RegState::Kill)
10765 .addFrameIndex(FI)
10766 .addImm(36) // &jbuf[1] :: pc
10767 .addMemOperand(FIMMOSt)
10768 .add(predOps(ARMCC::AL));
10769 } else if (isThumb) {
10770 // Incoming value: jbuf
10771 // ldr.n r1, LCPI1_4
10772 // add r1, pc
10773 // mov r2, #1
10774 // orrs r1, r2
10775 // add r2, $jbuf, #+4 ; &jbuf[1]
10776 // str r1, [r2]
10777 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10778 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10779 .addConstantPoolIndex(CPI)
10780 .addMemOperand(CPMMO)
10781 .add(predOps(ARMCC::AL));
10782 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10783 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10784 .addReg(NewVReg1, RegState::Kill)
10785 .addImm(PCLabelId);
10786 // Set the low bit because of thumb mode.
10787 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10788 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10789 .addReg(ARM::CPSR, RegState::Define)
10790 .addImm(1)
10791 .add(predOps(ARMCC::AL));
10792 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10793 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10794 .addReg(ARM::CPSR, RegState::Define)
10795 .addReg(NewVReg2, RegState::Kill)
10796 .addReg(NewVReg3, RegState::Kill)
10797 .add(predOps(ARMCC::AL));
10798 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10799 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10800 .addFrameIndex(FI)
10801 .addImm(36); // &jbuf[1] :: pc
10802 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10803 .addReg(NewVReg4, RegState::Kill)
10804 .addReg(NewVReg5, RegState::Kill)
10805 .addImm(0)
10806 .addMemOperand(FIMMOSt)
10807 .add(predOps(ARMCC::AL));
10808 } else {
10809 // Incoming value: jbuf
10810 // ldr r1, LCPI1_1
10811 // add r1, pc, r1
10812 // str r1, [$jbuf, #+4] ; &jbuf[1]
10813 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10814 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10815 .addConstantPoolIndex(CPI)
10816 .addImm(0)
10817 .addMemOperand(CPMMO)
10818 .add(predOps(ARMCC::AL));
10819 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10820 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10821 .addReg(NewVReg1, RegState::Kill)
10822 .addImm(PCLabelId)
10823 .add(predOps(ARMCC::AL));
10824 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10825 .addReg(NewVReg2, RegState::Kill)
10826 .addFrameIndex(FI)
10827 .addImm(36) // &jbuf[1] :: pc
10828 .addMemOperand(FIMMOSt)
10829 .add(predOps(ARMCC::AL));
10830 }
10831}
10832
10833void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10834 MachineBasicBlock *MBB) const {
10835 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10836 DebugLoc dl = MI.getDebugLoc();
10837 MachineFunction *MF = MBB->getParent();
10838 MachineRegisterInfo *MRI = &MF->getRegInfo();
10839 MachineFrameInfo &MFI = MF->getFrameInfo();
10840 int FI = MFI.getFunctionContextIndex();
10841
10842 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10843 : &ARM::GPRnopcRegClass;
10844
10845 // Get a mapping of the call site numbers to all of the landing pads they're
10846 // associated with.
10847 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10848 unsigned MaxCSNum = 0;
10849 for (MachineBasicBlock &BB : *MF) {
10850 if (!BB.isEHPad())
10851 continue;
10852
10853 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10854 // pad.
10855 for (MachineInstr &II : BB) {
10856 if (!II.isEHLabel())
10857 continue;
10858
10859 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10860 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10861
10862 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10863 for (unsigned Idx : CallSiteIdxs) {
10864 CallSiteNumToLPad[Idx].push_back(&BB);
10865 MaxCSNum = std::max(MaxCSNum, Idx);
10866 }
10867 break;
10868 }
10869 }
10870
10871 // Get an ordered list of the machine basic blocks for the jump table.
10872 std::vector<MachineBasicBlock*> LPadList;
10873 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10874 LPadList.reserve(CallSiteNumToLPad.size());
10875 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10876 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10877 for (MachineBasicBlock *MBB : MBBList) {
10878 LPadList.push_back(MBB);
10879 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10880 }
10881 }
10882
10883 assert(!LPadList.empty() &&(static_cast <bool> (!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? void (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10884, __extension__
__PRETTY_FUNCTION__))
10884 "No landing pad destinations for the dispatch jump table!")(static_cast <bool> (!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? void (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10884, __extension__
__PRETTY_FUNCTION__))
;
10885
10886 // Create the jump table and associated information.
10887 MachineJumpTableInfo *JTI =
10888 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10889 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10890
10891 // Create the MBBs for the dispatch code.
10892
10893 // Shove the dispatch's address into the return slot in the function context.
10894 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10895 DispatchBB->setIsEHPad();
10896
10897 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10898 unsigned trap_opcode;
10899 if (Subtarget->isThumb())
10900 trap_opcode = ARM::tTRAP;
10901 else
10902 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10903
10904 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10905 DispatchBB->addSuccessor(TrapBB);
10906
10907 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10908 DispatchBB->addSuccessor(DispContBB);
10909
10910 // Insert and MBBs.
10911 MF->insert(MF->end(), DispatchBB);
10912 MF->insert(MF->end(), DispContBB);
10913 MF->insert(MF->end(), TrapBB);
10914
10915 // Insert code into the entry block that creates and registers the function
10916 // context.
10917 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10918
10919 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10920 MachinePointerInfo::getFixedStack(*MF, FI),
10921 MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, Align(4));
10922
10923 MachineInstrBuilder MIB;
10924 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10925
10926 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10927 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10928
10929 // Add a register mask with no preserved registers. This results in all
10930 // registers being marked as clobbered. This can't work if the dispatch block
10931 // is in a Thumb1 function and is linked with ARM code which uses the FP
10932 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
10933 MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
10934
10935 bool IsPositionIndependent = isPositionIndependent();
10936 unsigned NumLPads = LPadList.size();
10937 if (Subtarget->isThumb2()) {
10938 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10939 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
10940 .addFrameIndex(FI)
10941 .addImm(4)
10942 .addMemOperand(FIMMOLd)
10943 .add(predOps(ARMCC::AL));
10944
10945 if (NumLPads < 256) {
10946 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
10947 .addReg(NewVReg1)
10948 .addImm(LPadList.size())
10949 .add(predOps(ARMCC::AL));
10950 } else {
10951 Register VReg1 = MRI->createVirtualRegister(TRC);
10952 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
10953 .addImm(NumLPads & 0xFFFF)
10954 .add(predOps(ARMCC::AL));
10955
10956 unsigned VReg2 = VReg1;
10957 if ((NumLPads & 0xFFFF0000) != 0) {
10958 VReg2 = MRI->createVirtualRegister(TRC);
10959 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
10960 .addReg(VReg1)
10961 .addImm(NumLPads >> 16)
10962 .add(predOps(ARMCC::AL));
10963 }
10964
10965 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
10966 .addReg(NewVReg1)
10967 .addReg(VReg2)
10968 .add(predOps(ARMCC::AL));
10969 }
10970
10971 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
10972 .addMBB(TrapBB)
10973 .addImm(ARMCC::HI)
10974 .addReg(ARM::CPSR);
10975
10976 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10977 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
10978 .addJumpTableIndex(MJTI)
10979 .add(predOps(ARMCC::AL));
10980
10981 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10982 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
10983 .addReg(NewVReg3, RegState::Kill)
10984 .addReg(NewVReg1)
10985 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
10986 .add(predOps(ARMCC::AL))
10987 .add(condCodeOp());
10988
10989 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
10990 .addReg(NewVReg4, RegState::Kill)
10991 .addReg(NewVReg1)
10992 .addJumpTableIndex(MJTI);
10993 } else if (Subtarget->isThumb()) {
10994 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10995 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
10996 .addFrameIndex(FI)
10997 .addImm(1)
10998 .addMemOperand(FIMMOLd)
10999 .add(predOps(ARMCC::AL));
11000
11001 if (NumLPads < 256) {
11002 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11003 .addReg(NewVReg1)
11004 .addImm(NumLPads)
11005 .add(predOps(ARMCC::AL));
11006 } else {
11007 MachineConstantPool *ConstantPool = MF->getConstantPool();
11008 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11009 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11010
11011 // MachineConstantPool wants an explicit alignment.
11012 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11013 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11014
11015 Register VReg1 = MRI->createVirtualRegister(TRC);
11016 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11017 .addReg(VReg1, RegState::Define)
11018 .addConstantPoolIndex(Idx)
11019 .add(predOps(ARMCC::AL));
11020 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11021 .addReg(NewVReg1)
11022 .addReg(VReg1)
11023 .add(predOps(ARMCC::AL));
11024 }
11025
11026 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11027 .addMBB(TrapBB)
11028 .addImm(ARMCC::HI)
11029 .addReg(ARM::CPSR);
11030
11031 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11032 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11033 .addReg(ARM::CPSR, RegState::Define)
11034 .addReg(NewVReg1)
11035 .addImm(2)
11036 .add(predOps(ARMCC::AL));
11037
11038 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11039 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11040 .addJumpTableIndex(MJTI)
11041 .add(predOps(ARMCC::AL));
11042
11043 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11044 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11045 .addReg(ARM::CPSR, RegState::Define)
11046 .addReg(NewVReg2, RegState::Kill)
11047 .addReg(NewVReg3)
11048 .add(predOps(ARMCC::AL));
11049
11050 MachineMemOperand *JTMMOLd =
11051 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11052 MachineMemOperand::MOLoad, 4, Align(4));
11053
11054 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11055 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11056 .addReg(NewVReg4, RegState::Kill)
11057 .addImm(0)
11058 .addMemOperand(JTMMOLd)
11059 .add(predOps(ARMCC::AL));
11060
11061 unsigned NewVReg6 = NewVReg5;
11062 if (IsPositionIndependent) {
11063 NewVReg6 = MRI->createVirtualRegister(TRC);
11064 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11065 .addReg(ARM::CPSR, RegState::Define)
11066 .addReg(NewVReg5, RegState::Kill)
11067 .addReg(NewVReg3)
11068 .add(predOps(ARMCC::AL));
11069 }
11070
11071 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11072 .addReg(NewVReg6, RegState::Kill)
11073 .addJumpTableIndex(MJTI);
11074 } else {
11075 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11076 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11077 .addFrameIndex(FI)
11078 .addImm(4)
11079 .addMemOperand(FIMMOLd)
11080 .add(predOps(ARMCC::AL));
11081
11082 if (NumLPads < 256) {
11083 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11084 .addReg(NewVReg1)
11085 .addImm(NumLPads)
11086 .add(predOps(ARMCC::AL));
11087 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11088 Register VReg1 = MRI->createVirtualRegister(TRC);
11089 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11090 .addImm(NumLPads & 0xFFFF)
11091 .add(predOps(ARMCC::AL));
11092
11093 unsigned VReg2 = VReg1;
11094 if ((NumLPads & 0xFFFF0000) != 0) {
11095 VReg2 = MRI->createVirtualRegister(TRC);
11096 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11097 .addReg(VReg1)
11098 .addImm(NumLPads >> 16)
11099 .add(predOps(ARMCC::AL));
11100 }
11101
11102 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11103 .addReg(NewVReg1)
11104 .addReg(VReg2)
11105 .add(predOps(ARMCC::AL));
11106 } else {
11107 MachineConstantPool *ConstantPool = MF->getConstantPool();
11108 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11109 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11110
11111 // MachineConstantPool wants an explicit alignment.
11112 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11113 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11114
11115 Register VReg1 = MRI->createVirtualRegister(TRC);
11116 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11117 .addReg(VReg1, RegState::Define)
11118 .addConstantPoolIndex(Idx)
11119 .addImm(0)
11120 .add(predOps(ARMCC::AL));
11121 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11122 .addReg(NewVReg1)
11123 .addReg(VReg1, RegState::Kill)
11124 .add(predOps(ARMCC::AL));
11125 }
11126
11127 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11128 .addMBB(TrapBB)
11129 .addImm(ARMCC::HI)
11130 .addReg(ARM::CPSR);
11131
11132 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11133 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11134 .addReg(NewVReg1)
11135 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
11136 .add(predOps(ARMCC::AL))
11137 .add(condCodeOp());
11138 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11139 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11140 .addJumpTableIndex(MJTI)
11141 .add(predOps(ARMCC::AL));
11142
11143 MachineMemOperand *JTMMOLd =
11144 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11145 MachineMemOperand::MOLoad, 4, Align(4));
11146 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11147 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11148 .addReg(NewVReg3, RegState::Kill)
11149 .addReg(NewVReg4)
11150 .addImm(0)
11151 .addMemOperand(JTMMOLd)
11152 .add(predOps(ARMCC::AL));
11153
11154 if (IsPositionIndependent) {
11155 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11156 .addReg(NewVReg5, RegState::Kill)
11157 .addReg(NewVReg4)
11158 .addJumpTableIndex(MJTI);
11159 } else {
11160 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11161 .addReg(NewVReg5, RegState::Kill)
11162 .addJumpTableIndex(MJTI);
11163 }
11164 }
11165
11166 // Add the jump table entries as successors to the MBB.
11167 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
11168 for (MachineBasicBlock *CurMBB : LPadList) {
11169 if (SeenMBBs.insert(CurMBB).second)
11170 DispContBB->addSuccessor(CurMBB);
11171 }
11172
11173 // N.B. the order the invoke BBs are processed in doesn't matter here.
11174 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11175 SmallVector<MachineBasicBlock*, 64> MBBLPads;
11176 for (MachineBasicBlock *BB : InvokeBBs) {
11177
11178 // Remove the landing pad successor from the invoke block and replace it
11179 // with the new dispatch block.
11180 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11181 while (!Successors.empty()) {
11182 MachineBasicBlock *SMBB = Successors.pop_back_val();
11183 if (SMBB->isEHPad()) {
11184 BB->removeSuccessor(SMBB);
11185 MBBLPads.push_back(SMBB);
11186 }
11187 }
11188
11189 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11190 BB->normalizeSuccProbs();
11191
11192 // Find the invoke call and mark all of the callee-saved registers as
11193 // 'implicit defined' so that they're spilled. This prevents code from
11194 // moving instructions to before the EH block, where they will never be
11195 // executed.
11196 for (MachineBasicBlock::reverse_iterator
11197 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11198 if (!II->isCall()) continue;
11199
11200 DenseMap<unsigned, bool> DefRegs;
11201 for (MachineInstr::mop_iterator
11202 OI = II->operands_begin(), OE = II->operands_end();
11203 OI != OE; ++OI) {
11204 if (!OI->isReg()) continue;
11205 DefRegs[OI->getReg()] = true;
11206 }
11207
11208 MachineInstrBuilder MIB(*MF, &*II);
11209
11210 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11211 unsigned Reg = SavedRegs[i];
11212 if (Subtarget->isThumb2() &&
11213 !ARM::tGPRRegClass.contains(Reg) &&
11214 !ARM::hGPRRegClass.contains(Reg))
11215 continue;
11216 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11217 continue;
11218 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11219 continue;
11220 if (!DefRegs[Reg])
11221 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
11222 }
11223
11224 break;
11225 }
11226 }
11227
11228 // Mark all former landing pads as non-landing pads. The dispatch is the only
11229 // landing pad now.
11230 for (MachineBasicBlock *MBBLPad : MBBLPads)
11231 MBBLPad->setIsEHPad(false);
11232
11233 // The instruction is gone now.
11234 MI.eraseFromParent();
11235}
11236
11237static
11238MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
11239 for (MachineBasicBlock *S : MBB->successors())
11240 if (S != Succ)
11241 return S;
11242 llvm_unreachable("Expecting a BB with two successors!")::llvm::llvm_unreachable_internal("Expecting a BB with two successors!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11242)
;
11243}
11244
11245/// Return the load opcode for a given load size. If load size >= 8,
11246/// neon opcode will be returned.
11247static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11248 if (LdSize >= 8)
11249 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11250 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11251 if (IsThumb1)
11252 return LdSize == 4 ? ARM::tLDRi
11253 : LdSize == 2 ? ARM::tLDRHi
11254 : LdSize == 1 ? ARM::tLDRBi : 0;
11255 if (IsThumb2)
11256 return LdSize == 4 ? ARM::t2LDR_POST
11257 : LdSize == 2 ? ARM::t2LDRH_POST
11258 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11259 return LdSize == 4 ? ARM::LDR_POST_IMM
11260 : LdSize == 2 ? ARM::LDRH_POST
11261 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11262}
11263
11264/// Return the store opcode for a given store size. If store size >= 8,
11265/// neon opcode will be returned.
11266static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11267 if (StSize >= 8)
11268 return StSize == 16 ? ARM::VST1q32wb_fixed
11269 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11270 if (IsThumb1)
11271 return StSize == 4 ? ARM::tSTRi
11272 : StSize == 2 ? ARM::tSTRHi
11273 : StSize == 1 ? ARM::tSTRBi : 0;
11274 if (IsThumb2)
11275 return StSize == 4 ? ARM::t2STR_POST
11276 : StSize == 2 ? ARM::t2STRH_POST
11277 : StSize == 1 ? ARM::t2STRB_POST : 0;
11278 return StSize == 4 ? ARM::STR_POST_IMM
11279 : StSize == 2 ? ARM::STRH_POST
11280 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11281}
11282
11283/// Emit a post-increment load operation with given size. The instructions
11284/// will be added to BB at Pos.
11285static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
11286 const TargetInstrInfo *TII, const DebugLoc &dl,
11287 unsigned LdSize, unsigned Data, unsigned AddrIn,
11288 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11289 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11290 assert(LdOpc != 0 && "Should have a load opcode")(static_cast <bool> (LdOpc != 0 && "Should have a load opcode"
) ? void (0) : __assert_fail ("LdOpc != 0 && \"Should have a load opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11290, __extension__
__PRETTY_FUNCTION__))
;
11291 if (LdSize >= 8) {
11292 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11293 .addReg(AddrOut, RegState::Define)
11294 .addReg(AddrIn)
11295 .addImm(0)
11296 .add(predOps(ARMCC::AL));
11297 } else if (IsThumb1) {
11298 // load + update AddrIn
11299 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11300 .addReg(AddrIn)
11301 .addImm(0)
11302 .add(predOps(ARMCC::AL));
11303 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11304 .add(t1CondCodeOp())
11305 .addReg(AddrIn)
11306 .addImm(LdSize)
11307 .add(predOps(ARMCC::AL));
11308 } else if (IsThumb2) {
11309 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11310 .addReg(AddrOut, RegState::Define)
11311 .addReg(AddrIn)
11312 .addImm(LdSize)
11313 .add(predOps(ARMCC::AL));
11314 } else { // arm
11315 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11316 .addReg(AddrOut, RegState::Define)
11317 .addReg(AddrIn)
11318 .addReg(0)
11319 .addImm(LdSize)
11320 .add(predOps(ARMCC::AL));
11321 }
11322}
11323
11324/// Emit a post-increment store operation with given size. The instructions
11325/// will be added to BB at Pos.
11326static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
11327 const TargetInstrInfo *TII, const DebugLoc &dl,
11328 unsigned StSize, unsigned Data, unsigned AddrIn,
11329 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11330 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11331 assert(StOpc != 0 && "Should have a store opcode")(static_cast <bool> (StOpc != 0 && "Should have a store opcode"
) ? void (0) : __assert_fail ("StOpc != 0 && \"Should have a store opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11331, __extension__
__PRETTY_FUNCTION__))
;
11332 if (StSize >= 8) {
11333 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11334 .addReg(AddrIn)
11335 .addImm(0)
11336 .addReg(Data)
11337 .add(predOps(ARMCC::AL));
11338 } else if (IsThumb1) {
11339 // store + update AddrIn
11340 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11341 .addReg(Data)
11342 .addReg(AddrIn)
11343 .addImm(0)
11344 .add(predOps(ARMCC::AL));
11345 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11346 .add(t1CondCodeOp())
11347 .addReg(AddrIn)
11348 .addImm(StSize)
11349 .add(predOps(ARMCC::AL));
11350 } else if (IsThumb2) {
11351 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11352 .addReg(Data)
11353 .addReg(AddrIn)
11354 .addImm(StSize)
11355 .add(predOps(ARMCC::AL));
11356 } else { // arm
11357 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11358 .addReg(Data)
11359 .addReg(AddrIn)
11360 .addReg(0)
11361 .addImm(StSize)
11362 .add(predOps(ARMCC::AL));
11363 }
11364}
11365
11366MachineBasicBlock *
11367ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11368 MachineBasicBlock *BB) const {
11369 // This pseudo instruction has 3 operands: dst, src, size
11370 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11371 // Otherwise, we will generate unrolled scalar copies.
11372 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11373 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11374 MachineFunction::iterator It = ++BB->getIterator();
11375
11376 Register dest = MI.getOperand(0).getReg();
11377 Register src = MI.getOperand(1).getReg();
11378 unsigned SizeVal = MI.getOperand(2).getImm();
11379 unsigned Alignment = MI.getOperand(3).getImm();
11380 DebugLoc dl = MI.getDebugLoc();
11381
11382 MachineFunction *MF = BB->getParent();
11383 MachineRegisterInfo &MRI = MF->getRegInfo();
11384 unsigned UnitSize = 0;
11385 const TargetRegisterClass *TRC = nullptr;
11386 const TargetRegisterClass *VecTRC = nullptr;
11387
11388 bool IsThumb1 = Subtarget->isThumb1Only();
11389 bool IsThumb2 = Subtarget->isThumb2();
11390 bool IsThumb = Subtarget->isThumb();
11391
11392 if (Alignment & 1) {
11393 UnitSize = 1;
11394 } else if (Alignment & 2) {
11395 UnitSize = 2;
11396 } else {
11397 // Check whether we can use NEON instructions.
11398 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11399 Subtarget->hasNEON()) {
11400 if ((Alignment % 16 == 0) && SizeVal >= 16)
11401 UnitSize = 16;
11402 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11403 UnitSize = 8;
11404 }
11405 // Can't use NEON instructions.
11406 if (UnitSize == 0)
11407 UnitSize = 4;
11408 }
11409
11410 // Select the correct opcode and register class for unit size load/store
11411 bool IsNeon = UnitSize >= 8;
11412 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11413 if (IsNeon)
11414 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11415 : UnitSize == 8 ? &ARM::DPRRegClass
11416 : nullptr;
11417
11418 unsigned BytesLeft = SizeVal % UnitSize;
11419 unsigned LoopSize = SizeVal - BytesLeft;
11420
11421 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11422 // Use LDR and STR to copy.
11423 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11424 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11425 unsigned srcIn = src;
11426 unsigned destIn = dest;
11427 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11428 Register srcOut = MRI.createVirtualRegister(TRC);
11429 Register destOut = MRI.createVirtualRegister(TRC);
11430 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11431 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11432 IsThumb1, IsThumb2);
11433 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11434 IsThumb1, IsThumb2);
11435 srcIn = srcOut;
11436 destIn = destOut;
11437 }
11438
11439 // Handle the leftover bytes with LDRB and STRB.
11440 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11441 // [destOut] = STRB_POST(scratch, destIn, 1)
11442 for (unsigned i = 0; i < BytesLeft; i++) {
11443 Register srcOut = MRI.createVirtualRegister(TRC);
11444 Register destOut = MRI.createVirtualRegister(TRC);
11445 Register scratch = MRI.createVirtualRegister(TRC);
11446 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11447 IsThumb1, IsThumb2);
11448 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11449 IsThumb1, IsThumb2);
11450 srcIn = srcOut;
11451 destIn = destOut;
11452 }
11453 MI.eraseFromParent(); // The instruction is gone now.
11454 return BB;
11455 }
11456
11457 // Expand the pseudo op to a loop.
11458 // thisMBB:
11459 // ...
11460 // movw varEnd, # --> with thumb2
11461 // movt varEnd, #
11462 // ldrcp varEnd, idx --> without thumb2
11463 // fallthrough --> loopMBB
11464 // loopMBB:
11465 // PHI varPhi, varEnd, varLoop
11466 // PHI srcPhi, src, srcLoop
11467 // PHI destPhi, dst, destLoop
11468 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11469 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11470 // subs varLoop, varPhi, #UnitSize
11471 // bne loopMBB
11472 // fallthrough --> exitMBB
11473 // exitMBB:
11474 // epilogue to handle left-over bytes
11475 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11476 // [destOut] = STRB_POST(scratch, destLoop, 1)
11477 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11478 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11479 MF->insert(It, loopMBB);
11480 MF->insert(It, exitMBB);
11481
11482 // Transfer the remainder of BB and its successor edges to exitMBB.
11483 exitMBB->splice(exitMBB->begin(), BB,
11484 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11485 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11486
11487 // Load an immediate to varEnd.
11488 Register varEnd = MRI.createVirtualRegister(TRC);
11489 if (Subtarget->useMovt()) {
11490 unsigned Vtmp = varEnd;
11491 if ((LoopSize & 0xFFFF0000) != 0)
11492 Vtmp = MRI.createVirtualRegister(TRC);
11493 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
11494 .addImm(LoopSize & 0xFFFF)
11495 .add(predOps(ARMCC::AL));
11496
11497 if ((LoopSize & 0xFFFF0000) != 0)
11498 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
11499 .addReg(Vtmp)
11500 .addImm(LoopSize >> 16)
11501 .add(predOps(ARMCC::AL));
11502 } else {
11503 MachineConstantPool *ConstantPool = MF->getConstantPool();
11504 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11505 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11506
11507 // MachineConstantPool wants an explicit alignment.
11508 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11509 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11510 MachineMemOperand *CPMMO =
11511 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
11512 MachineMemOperand::MOLoad, 4, Align(4));
11513
11514 if (IsThumb)
11515 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11516 .addReg(varEnd, RegState::Define)
11517 .addConstantPoolIndex(Idx)
11518 .add(predOps(ARMCC::AL))
11519 .addMemOperand(CPMMO);
11520 else
11521 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11522 .addReg(varEnd, RegState::Define)
11523 .addConstantPoolIndex(Idx)
11524 .addImm(0)
11525 .add(predOps(ARMCC::AL))
11526 .addMemOperand(CPMMO);
11527 }
11528 BB->addSuccessor(loopMBB);
11529
11530 // Generate the loop body:
11531 // varPhi = PHI(varLoop, varEnd)
11532 // srcPhi = PHI(srcLoop, src)
11533 // destPhi = PHI(destLoop, dst)
11534 MachineBasicBlock *entryBB = BB;
11535 BB = loopMBB;
11536 Register varLoop = MRI.createVirtualRegister(TRC);
11537 Register varPhi = MRI.createVirtualRegister(TRC);
11538 Register srcLoop = MRI.createVirtualRegister(TRC);
11539 Register srcPhi = MRI.createVirtualRegister(TRC);
11540 Register destLoop = MRI.createVirtualRegister(TRC);
11541 Register destPhi = MRI.createVirtualRegister(TRC);
11542
11543 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11544 .addReg(varLoop).addMBB(loopMBB)
11545 .addReg(varEnd).addMBB(entryBB);
11546 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11547 .addReg(srcLoop).addMBB(loopMBB)
11548 .addReg(src).addMBB(entryBB);
11549 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11550 .addReg(destLoop).addMBB(loopMBB)
11551 .addReg(dest).addMBB(entryBB);
11552
11553 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11554 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11555 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11556 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11557 IsThumb1, IsThumb2);
11558 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11559 IsThumb1, IsThumb2);
11560
11561 // Decrement loop variable by UnitSize.
11562 if (IsThumb1) {
11563 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11564 .add(t1CondCodeOp())
11565 .addReg(varPhi)
11566 .addImm(UnitSize)
11567 .add(predOps(ARMCC::AL));
11568 } else {
11569 MachineInstrBuilder MIB =
11570 BuildMI(*BB, BB->end(), dl,
11571 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11572 MIB.addReg(varPhi)
11573 .addImm(UnitSize)
11574 .add(predOps(ARMCC::AL))
11575 .add(condCodeOp());
11576 MIB->getOperand(5).setReg(ARM::CPSR);
11577 MIB->getOperand(5).setIsDef(true);
11578 }
11579 BuildMI(*BB, BB->end(), dl,
11580 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11581 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11582
11583 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11584 BB->addSuccessor(loopMBB);
11585 BB->addSuccessor(exitMBB);
11586
11587 // Add epilogue to handle BytesLeft.
11588 BB = exitMBB;
11589 auto StartOfExit = exitMBB->begin();
11590
11591 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11592 // [destOut] = STRB_POST(scratch, destLoop, 1)
11593 unsigned srcIn = srcLoop;
11594 unsigned destIn = destLoop;
11595 for (unsigned i = 0; i < BytesLeft; i++) {
11596 Register srcOut = MRI.createVirtualRegister(TRC);
11597 Register destOut = MRI.createVirtualRegister(TRC);
11598 Register scratch = MRI.createVirtualRegister(TRC);
11599 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11600 IsThumb1, IsThumb2);
11601 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11602 IsThumb1, IsThumb2);
11603 srcIn = srcOut;
11604 destIn = destOut;
11605 }
11606
11607 MI.eraseFromParent(); // The instruction is gone now.
11608 return BB;
11609}
11610
11611MachineBasicBlock *
11612ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11613 MachineBasicBlock *MBB) const {
11614 const TargetMachine &TM = getTargetMachine();
11615 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11616 DebugLoc DL = MI.getDebugLoc();
11617
11618 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11619, __extension__
__PRETTY_FUNCTION__))
11619 "__chkstk is only supported on Windows")(static_cast <bool> (Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11619, __extension__
__PRETTY_FUNCTION__))
;
11620 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode")(static_cast <bool> (Subtarget->isThumb2() &&
"Windows on ARM requires Thumb-2 mode") ? void (0) : __assert_fail
("Subtarget->isThumb2() && \"Windows on ARM requires Thumb-2 mode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11620, __extension__
__PRETTY_FUNCTION__))
;
11621
11622 // __chkstk takes the number of words to allocate on the stack in R4, and
11623 // returns the stack adjustment in number of bytes in R4. This will not
11624 // clober any other registers (other than the obvious lr).
11625 //
11626 // Although, technically, IP should be considered a register which may be
11627 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11628 // thumb-2 environment, so there is no interworking required. As a result, we
11629 // do not expect a veneer to be emitted by the linker, clobbering IP.
11630 //
11631 // Each module receives its own copy of __chkstk, so no import thunk is
11632 // required, again, ensuring that IP is not clobbered.
11633 //
11634 // Finally, although some linkers may theoretically provide a trampoline for
11635 // out of range calls (which is quite common due to a 32M range limitation of
11636 // branches for Thumb), we can generate the long-call version via
11637 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11638 // IP.
11639
11640 switch (TM.getCodeModel()) {
11641 case CodeModel::Tiny:
11642 llvm_unreachable("Tiny code model not available on ARM.")::llvm::llvm_unreachable_internal("Tiny code model not available on ARM."
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11642)
;
11643 case CodeModel::Small:
11644 case CodeModel::Medium:
11645 case CodeModel::Kernel:
11646 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11647 .add(predOps(ARMCC::AL))
11648 .addExternalSymbol("__chkstk")
11649 .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
11650 .addReg(ARM::R4, RegState::Implicit | RegState::Define)
11651 .addReg(ARM::R12,
11652 RegState::Implicit | RegState::Define | RegState::Dead)
11653 .addReg(ARM::CPSR,
11654 RegState::Implicit | RegState::Define | RegState::Dead);
11655 break;
11656 case CodeModel::Large: {
11657 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
11658 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11659
11660 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11661 .addExternalSymbol("__chkstk");
11662 BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
11663 .add(predOps(ARMCC::AL))
11664 .addReg(Reg, RegState::Kill)
11665 .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
11666 .addReg(ARM::R4, RegState::Implicit | RegState::Define)
11667 .addReg(ARM::R12,
11668 RegState::Implicit | RegState::Define | RegState::Dead)
11669 .addReg(ARM::CPSR,
11670 RegState::Implicit | RegState::Define | RegState::Dead);
11671 break;
11672 }
11673 }
11674
11675 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11676 .addReg(ARM::SP, RegState::Kill)
11677 .addReg(ARM::R4, RegState::Kill)
11678 .setMIFlags(MachineInstr::FrameSetup)
11679 .add(predOps(ARMCC::AL))
11680 .add(condCodeOp());
11681
11682 MI.eraseFromParent();
11683 return MBB;
11684}
11685
11686MachineBasicBlock *
11687ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11688 MachineBasicBlock *MBB) const {
11689 DebugLoc DL = MI.getDebugLoc();
11690 MachineFunction *MF = MBB->getParent();
11691 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11692
11693 MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11694 MF->insert(++MBB->getIterator(), ContBB);
11695 ContBB->splice(ContBB->begin(), MBB,
11696 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11697 ContBB->transferSuccessorsAndUpdatePHIs(MBB);
11698 MBB->addSuccessor(ContBB);
11699
11700 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11701 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11702 MF->push_back(TrapBB);
11703 MBB->addSuccessor(TrapBB);
11704
11705 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11706 .addReg(MI.getOperand(0).getReg())
11707 .addImm(0)
11708 .add(predOps(ARMCC::AL));
11709 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11710 .addMBB(TrapBB)
11711 .addImm(ARMCC::EQ)
11712 .addReg(ARM::CPSR);
11713
11714 MI.eraseFromParent();
11715 return ContBB;
11716}
11717
11718// The CPSR operand of SelectItr might be missing a kill marker
11719// because there were multiple uses of CPSR, and ISel didn't know
11720// which to mark. Figure out whether SelectItr should have had a
11721// kill marker, and set it if it should. Returns the correct kill
11722// marker value.
11723static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
11724 MachineBasicBlock* BB,
11725 const TargetRegisterInfo* TRI) {
11726 // Scan forward through BB for a use/def of CPSR.
11727 MachineBasicBlock::iterator miI(std::next(SelectItr));
11728 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11729 const MachineInstr& mi = *miI;
11730 if (mi.readsRegister(ARM::CPSR))
11731 return false;
11732 if (mi.definesRegister(ARM::CPSR))
11733 break; // Should have kill-flag - update below.
11734 }
11735
11736 // If we hit the end of the block, check whether CPSR is live into a
11737 // successor.
11738 if (miI == BB->end()) {
11739 for (MachineBasicBlock *Succ : BB->successors())
11740 if (Succ->isLiveIn(ARM::CPSR))
11741 return false;
11742 }
11743
11744 // We found a def, or hit the end of the basic block and CPSR wasn't live
11745 // out. SelectMI should have a kill flag on CPSR.
11746 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11747 return true;
11748}
11749
11750/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11751/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11752static Register genTPEntry(MachineBasicBlock *TpEntry,
11753 MachineBasicBlock *TpLoopBody,
11754 MachineBasicBlock *TpExit, Register OpSizeReg,
11755 const TargetInstrInfo *TII, DebugLoc Dl,
11756 MachineRegisterInfo &MRI) {
11757 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11758 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11759 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11760 .addUse(OpSizeReg)
11761 .addImm(15)
11762 .add(predOps(ARMCC::AL))
11763 .addReg(0);
11764
11765 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11766 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11767 .addUse(AddDestReg, RegState::Kill)
11768 .addImm(4)
11769 .add(predOps(ARMCC::AL))
11770 .addReg(0);
11771
11772 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11773 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11774 .addUse(LsrDestReg, RegState::Kill);
11775
11776 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11777 .addUse(TotalIterationsReg)
11778 .addMBB(TpExit);
11779
11780 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11781 .addMBB(TpLoopBody)
11782 .add(predOps(ARMCC::AL));
11783
11784 return TotalIterationsReg;
11785}
11786
11787/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11788/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11789/// loops.
11790static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11791 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11792 const TargetInstrInfo *TII, DebugLoc Dl,
11793 MachineRegisterInfo &MRI, Register OpSrcReg,
11794 Register OpDestReg, Register ElementCountReg,
11795 Register TotalIterationsReg, bool IsMemcpy) {
11796 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11797 // array, loop iteration counter, predication counter.
11798
11799 Register SrcPhiReg, CurrSrcReg;
11800 if (IsMemcpy) {
11801 // Current position in the src array
11802 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11803 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11804 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11805 .addUse(OpSrcReg)
11806 .addMBB(TpEntry)
11807 .addUse(CurrSrcReg)
11808 .addMBB(TpLoopBody);
11809 }
11810
11811 // Current position in the dest array
11812 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11813 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11814 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11815 .addUse(OpDestReg)
11816 .addMBB(TpEntry)
11817 .addUse(CurrDestReg)
11818 .addMBB(TpLoopBody);
11819
11820 // Current loop counter
11821 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11822 Register RemainingLoopIterationsReg =
11823 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11824 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11825 .addUse(TotalIterationsReg)
11826 .addMBB(TpEntry)
11827 .addUse(RemainingLoopIterationsReg)
11828 .addMBB(TpLoopBody);
11829
11830 // Predication counter
11831 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11832 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11833 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11834 .addUse(ElementCountReg)
11835 .addMBB(TpEntry)
11836 .addUse(RemainingElementsReg)
11837 .addMBB(TpLoopBody);
11838
11839 // Pass predication counter to VCTP
11840 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11841 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11842 .addUse(PredCounterPhiReg)
11843 .addImm(ARMVCC::None)
11844 .addReg(0)
11845 .addReg(0);
11846
11847 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11848 .addUse(PredCounterPhiReg)
11849 .addImm(16)
11850 .add(predOps(ARMCC::AL))
11851 .addReg(0);
11852
11853 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11854 Register SrcValueReg;
11855 if (IsMemcpy) {
11856 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11857 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11858 .addDef(CurrSrcReg)
11859 .addDef(SrcValueReg)
11860 .addReg(SrcPhiReg)
11861 .addImm(16)
11862 .addImm(ARMVCC::Then)
11863 .addUse(VccrReg)
11864 .addReg(0);
11865 } else
11866 SrcValueReg = OpSrcReg;
11867
11868 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11869 .addDef(CurrDestReg)
11870 .addUse(SrcValueReg)
11871 .addReg(DestPhiReg)
11872 .addImm(16)
11873 .addImm(ARMVCC::Then)
11874 .addUse(VccrReg)
11875 .addReg(0);
11876
11877 // Add the pseudoInstrs for decrementing the loop counter and marking the
11878 // end:t2DoLoopDec and t2DoLoopEnd
11879 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11880 .addUse(LoopCounterPhiReg)
11881 .addImm(1);
11882
11883 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11884 .addUse(RemainingLoopIterationsReg)
11885 .addMBB(TpLoopBody);
11886
11887 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11888 .addMBB(TpExit)
11889 .add(predOps(ARMCC::AL));
11890}
11891
11892MachineBasicBlock *
11893ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11894 MachineBasicBlock *BB) const {
11895 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11896 DebugLoc dl = MI.getDebugLoc();
11897 bool isThumb2 = Subtarget->isThumb2();
11898 switch (MI.getOpcode()) {
11899 default: {
11900 MI.print(errs());
11901 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11901)
;
11902 }
11903
11904 // Thumb1 post-indexed loads are really just single-register LDMs.
11905 case ARM::tLDR_postidx: {
11906 MachineOperand Def(MI.getOperand(1));
11907 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11908 .add(Def) // Rn_wb
11909 .add(MI.getOperand(2)) // Rn
11910 .add(MI.getOperand(3)) // PredImm
11911 .add(MI.getOperand(4)) // PredReg
11912 .add(MI.getOperand(0)) // Rt
11913 .cloneMemRefs(MI);
11914 MI.eraseFromParent();
11915 return BB;
11916 }
11917
11918 case ARM::MVE_MEMCPYLOOPINST:
11919 case ARM::MVE_MEMSETLOOPINST: {
11920
11921 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11922 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11923 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11924 // adds the relevant instructions in the TP loop Body for generation of a
11925 // WLSTP loop.
11926
11927 // Below is relevant portion of the CFG after the transformation.
11928 // The Machine Basic Blocks are shown along with branch conditions (in
11929 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11930 // portion of the CFG and may not necessarily be the entry/exit of the
11931 // function.
11932
11933 // (Relevant) CFG after transformation:
11934 // TP entry MBB
11935 // |
11936 // |-----------------|
11937 // (n <= 0) (n > 0)
11938 // | |
11939 // | TP loop Body MBB<--|
11940 // | | |
11941 // \ |___________|
11942 // \ /
11943 // TP exit MBB
11944
11945 MachineFunction *MF = BB->getParent();
11946 MachineFunctionProperties &Properties = MF->getProperties();
11947 MachineRegisterInfo &MRI = MF->getRegInfo();
11948
11949 Register OpDestReg = MI.getOperand(0).getReg();
11950 Register OpSrcReg = MI.getOperand(1).getReg();
11951 Register OpSizeReg = MI.getOperand(2).getReg();
11952
11953 // Allocate the required MBBs and add to parent function.
11954 MachineBasicBlock *TpEntry = BB;
11955 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
11956 MachineBasicBlock *TpExit;
11957
11958 MF->push_back(TpLoopBody);
11959
11960 // If any instructions are present in the current block after
11961 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
11962 // move the instructions into the newly created exit block. If there are no
11963 // instructions add an explicit branch to the FallThrough block and then
11964 // split.
11965 //
11966 // The split is required for two reasons:
11967 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
11968 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
11969 // need to be updated. splitAt() already handles this.
11970 TpExit = BB->splitAt(MI, false);
11971 if (TpExit == BB) {
11972 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "(static_cast <bool> (BB->canFallThrough() &&
"Exit Block must be Fallthrough of the " "block containing memcpy/memset Pseudo"
) ? void (0) : __assert_fail ("BB->canFallThrough() && \"Exit Block must be Fallthrough of the \" \"block containing memcpy/memset Pseudo\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11973, __extension__
__PRETTY_FUNCTION__))
11973 "block containing memcpy/memset Pseudo")(static_cast <bool> (BB->canFallThrough() &&
"Exit Block must be Fallthrough of the " "block containing memcpy/memset Pseudo"
) ? void (0) : __assert_fail ("BB->canFallThrough() && \"Exit Block must be Fallthrough of the \" \"block containing memcpy/memset Pseudo\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11973, __extension__
__PRETTY_FUNCTION__))
;
11974 TpExit = BB->getFallThrough();
11975 BuildMI(BB, dl, TII->get(ARM::t2B))
11976 .addMBB(TpExit)
11977 .add(predOps(ARMCC::AL));
11978 TpExit = BB->splitAt(MI, false);
11979 }
11980
11981 // Add logic for iteration count
11982 Register TotalIterationsReg =
11983 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
11984
11985 // Add the vectorized (and predicated) loads/store instructions
11986 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
11987 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
11988 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
11989
11990 // Required to avoid conflict with the MachineVerifier during testing.
11991 Properties.reset(MachineFunctionProperties::Property::NoPHIs);
11992
11993 // Connect the blocks
11994 TpEntry->addSuccessor(TpLoopBody);
11995 TpLoopBody->addSuccessor(TpLoopBody);
11996 TpLoopBody->addSuccessor(TpExit);
11997
11998 // Reorder for a more natural layout
11999 TpLoopBody->moveAfter(TpEntry);
12000 TpExit->moveAfter(TpLoopBody);
12001
12002 // Finally, remove the memcpy Psuedo Instruction
12003 MI.eraseFromParent();
12004
12005 // Return the exit block as it may contain other instructions requiring a
12006 // custom inserter
12007 return TpExit;
12008 }
12009
12010 // The Thumb2 pre-indexed stores have the same MI operands, they just
12011 // define them differently in the .td files from the isel patterns, so
12012 // they need pseudos.
12013 case ARM::t2STR_preidx:
12014 MI.setDesc(TII->get(ARM::t2STR_PRE));
12015 return BB;
12016 case ARM::t2STRB_preidx:
12017 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12018 return BB;
12019 case ARM::t2STRH_preidx:
12020 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12021 return BB;
12022
12023 case ARM::STRi_preidx:
12024 case ARM::STRBi_preidx: {
12025 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12026 : ARM::STRB_PRE_IMM;
12027 // Decode the offset.
12028 unsigned Offset = MI.getOperand(4).getImm();
12029 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12030 Offset = ARM_AM::getAM2Offset(Offset);
12031 if (isSub)
12032 Offset = -Offset;
12033
12034 MachineMemOperand *MMO = *MI.memoperands_begin();
12035 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12036 .add(MI.getOperand(0)) // Rn_wb
12037 .add(MI.getOperand(1)) // Rt
12038 .add(MI.getOperand(2)) // Rn
12039 .addImm(Offset) // offset (skip GPR==zero_reg)
12040 .add(MI.getOperand(5)) // pred
12041 .add(MI.getOperand(6))
12042 .addMemOperand(MMO);
12043 MI.eraseFromParent();
12044 return BB;
12045 }
12046 case ARM::STRr_preidx:
12047 case ARM::STRBr_preidx:
12048 case ARM::STRH_preidx: {
12049 unsigned NewOpc;
12050 switch (MI.getOpcode()) {
12051 default: llvm_unreachable("unexpected opcode!")::llvm::llvm_unreachable_internal("unexpected opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 12051)
;
12052 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12053 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12054 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12055 }
12056 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12057 for (const MachineOperand &MO : MI.operands())
12058 MIB.add(MO);
12059 MI.eraseFromParent();
12060 return BB;
12061 }
12062
12063 case ARM::tMOVCCr_pseudo: {
12064 // To "insert" a SELECT_CC instruction, we actually have to insert the
12065 // diamond control-flow pattern. The incoming instruction knows the
12066 // destination vreg to set, the condition code register to branch on, the
12067 // true/false values to select between, and a branch opcode to use.
12068 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12069 MachineFunction::iterator It = ++BB->getIterator();
12070
12071 // thisMBB:
12072 // ...
12073 // TrueVal = ...
12074 // cmpTY ccX, r1, r2
12075 // bCC copy1MBB
12076 // fallthrough --> copy0MBB
12077 MachineBasicBlock *thisMBB = BB;
12078 MachineFunction *F = BB->getParent();
12079 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12080 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12081 F->insert(It, copy0MBB);
12082 F->insert(It, sinkMBB);
12083
12084 // Check whether CPSR is live past the tMOVCCr_pseudo.
12085 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12086 if (!MI.killsRegister(ARM::CPSR) &&
12087 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12088 copy0MBB->addLiveIn(ARM::CPSR);
12089 sinkMBB->addLiveIn(ARM::CPSR);
12090 }
12091
12092 // Transfer the remainder of BB and its successor edges to sinkMBB.
12093 sinkMBB->splice(sinkMBB->begin(), BB,
12094 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12095 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12096
12097 BB->addSuccessor(copy0MBB);
12098 BB->addSuccessor(sinkMBB);
12099
12100 BuildMI(BB, dl, TII->get(ARM::tBcc))
12101 .addMBB(sinkMBB)
12102 .addImm(MI.getOperand(3).getImm())
12103 .addReg(MI.getOperand(4).getReg());
12104
12105 // copy0MBB:
12106 // %FalseValue = ...
12107 // # fallthrough to sinkMBB
12108 BB = copy0MBB;
12109
12110 // Update machine-CFG edges
12111 BB->addSuccessor(sinkMBB);
12112
12113 // sinkMBB:
12114 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12115 // ...
12116 BB = sinkMBB;
12117 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12118 .addReg(MI.getOperand(1).getReg())
12119 .addMBB(copy0MBB)
12120 .addReg(MI.getOperand(2).getReg())
12121 .addMBB(thisMBB);
12122
12123 MI.eraseFromParent(); // The pseudo instruction is gone now.
12124 return BB;
12125 }
12126
12127 case ARM::BCCi64:
12128 case ARM::BCCZi64: {
12129 // If there is an unconditional branch to the other successor, remove it.
12130 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12131
12132 // Compare both parts that make up the double comparison separately for
12133 // equality.
12134 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12135
12136 Register LHS1 = MI.getOperand(1).getReg();
12137 Register LHS2 = MI.getOperand(2).getReg();
12138 if (RHSisZero) {
12139 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12140 .addReg(LHS1)
12141 .addImm(0)
12142 .add(predOps(ARMCC::AL));
12143 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12144 .addReg(LHS2).addImm(0)
12145 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12146 } else {
12147 Register RHS1 = MI.getOperand(3).getReg();
12148 Register RHS2 = MI.getOperand(4).getReg();
12149 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12150 .addReg(LHS1)
12151 .addReg(RHS1)
12152 .add(predOps(ARMCC::AL));
12153 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12154 .addReg(LHS2).addReg(RHS2)
12155 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12156 }
12157
12158 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12159 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12160 if (MI.getOperand(0).getImm() == ARMCC::NE)
12161 std::swap(destMBB, exitMBB);
12162
12163 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12164 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12165 if (isThumb2)
12166 BuildMI(BB, dl, TII->get(ARM::t2B))
12167 .addMBB(exitMBB)
12168 .add(predOps(ARMCC::AL));
12169 else
12170 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12171
12172 MI.eraseFromParent(); // The pseudo instruction is gone now.
12173 return BB;
12174 }
12175
12176 case ARM::Int_eh_sjlj_setjmp:
12177 case ARM::Int_eh_sjlj_setjmp_nofp:
12178 case ARM::tInt_eh_sjlj_setjmp:
12179 case ARM::t2Int_eh_sjlj_setjmp:
12180 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12181 return BB;
12182
12183 case ARM::Int_eh_sjlj_setup_dispatch:
12184 EmitSjLjDispatchBlock(MI, BB);
12185 return BB;
12186
12187 case ARM::ABS:
12188 case ARM::t2ABS: {
12189 // To insert an ABS instruction, we have to insert the
12190 // diamond control-flow pattern. The incoming instruction knows the
12191 // source vreg to test against 0, the destination vreg to set,
12192 // the condition code register to branch on, the
12193 // true/false values to select between, and a branch opcode to use.
12194 // It transforms
12195 // V1 = ABS V0
12196 // into
12197 // V2 = MOVS V0
12198 // BCC (branch to SinkBB if V0 >= 0)
12199 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12200 // SinkBB: V1 = PHI(V2, V3)
12201 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12202 MachineFunction::iterator BBI = ++BB->getIterator();
12203 MachineFunction *Fn = BB->getParent();
12204 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12205 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12206 Fn->insert(BBI, RSBBB);
12207 Fn->insert(BBI, SinkBB);
12208
12209 Register ABSSrcReg = MI.getOperand(1).getReg();
12210 Register ABSDstReg = MI.getOperand(0).getReg();
12211 bool ABSSrcKIll = MI.getOperand(1).isKill();
12212 bool isThumb2 = Subtarget->isThumb2();
12213 MachineRegisterInfo &MRI = Fn->getRegInfo();
12214 // In Thumb mode S must not be specified if source register is the SP or
12215 // PC and if destination register is the SP, so restrict register class
12216 Register NewRsbDstReg = MRI.createVirtualRegister(
12217 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12218
12219 // Transfer the remainder of BB and its successor edges to sinkMBB.
12220 SinkBB->splice(SinkBB->begin(), BB,
12221 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12222 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
12223
12224 BB->addSuccessor(RSBBB);
12225 BB->addSuccessor(SinkBB);
12226
12227 // fall through to SinkMBB
12228 RSBBB->addSuccessor(SinkBB);
12229
12230 // insert a cmp at the end of BB
12231 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12232 .addReg(ABSSrcReg)
12233 .addImm(0)
12234 .add(predOps(ARMCC::AL));
12235
12236 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12237 BuildMI(BB, dl,
12238 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12239 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
12240
12241 // insert rsbri in RSBBB
12242 // Note: BCC and rsbri will be converted into predicated rsbmi
12243 // by if-conversion pass
12244 BuildMI(*RSBBB, RSBBB->begin(), dl,
12245 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12246 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12247 .addImm(0)
12248 .add(predOps(ARMCC::AL))
12249 .add(condCodeOp());
12250
12251 // insert PHI in SinkBB,
12252 // reuse ABSDstReg to not change uses of ABS instruction
12253 BuildMI(*SinkBB, SinkBB->begin(), dl,
12254 TII->get(ARM::PHI), ABSDstReg)
12255 .addReg(NewRsbDstReg).addMBB(RSBBB)
12256 .addReg(ABSSrcReg).addMBB(BB);
12257
12258 // remove ABS instruction
12259 MI.eraseFromParent();
12260
12261 // return last added BB
12262 return SinkBB;
12263 }
12264 case ARM::COPY_STRUCT_BYVAL_I32:
12265 ++NumLoopByVals;
12266 return EmitStructByval(MI, BB);
12267 case ARM::WIN__CHKSTK:
12268 return EmitLowered__chkstk(MI, BB);
12269 case ARM::WIN__DBZCHK:
12270 return EmitLowered__dbzchk(MI, BB);
12271 }
12272}
12273
12274/// Attaches vregs to MEMCPY that it will use as scratch registers
12275/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12276/// instead of as a custom inserter because we need the use list from the SDNode.
12277static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12278 MachineInstr &MI, const SDNode *Node) {
12279 bool isThumb1 = Subtarget->isThumb1Only();
12280
12281 DebugLoc DL = MI.getDebugLoc();
12282 MachineFunction *MF = MI.getParent()->getParent();
12283 MachineRegisterInfo &MRI = MF->getRegInfo();
12284 MachineInstrBuilder MIB(*MF, MI);
12285
12286 // If the new dst/src is unused mark it as dead.
12287 if (!Node->hasAnyUseOfValue(0)) {
12288 MI.getOperand(0).setIsDead(true);
12289 }
12290 if (!Node->hasAnyUseOfValue(1)) {
12291 MI.getOperand(1).setIsDead(true);
12292 }
12293
12294 // The MEMCPY both defines and kills the scratch registers.
12295 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12296 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12297 : &ARM::GPRRegClass);
12298 MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
12299 }
12300}
12301
12302void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
12303 SDNode *Node) const {
12304 if (MI.getOpcode() == ARM::MEMCPY) {
12305 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12306 return;
12307 }
12308
12309 const MCInstrDesc *MCID = &MI.getDesc();
12310 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12311 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12312 // operand is still set to noreg. If needed, set the optional operand's
12313 // register to CPSR, and remove the redundant implicit def.
12314 //
12315 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12316
12317 // Rename pseudo opcodes.
12318 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12319 unsigned ccOutIdx;
12320 if (NewOpc) {
12321 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12322 MCID = &TII->get(NewOpc);
12323
12324 assert(MCID->getNumOperands() ==(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12327, __extension__
__PRETTY_FUNCTION__))
12325 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12327, __extension__
__PRETTY_FUNCTION__))
12326 && "converted opcode should be the same except for cc_out"(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12327, __extension__
__PRETTY_FUNCTION__))
12327 " (and, on Thumb1, pred)")(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12327, __extension__
__PRETTY_FUNCTION__))
;
12328
12329 MI.setDesc(*MCID);
12330
12331 // Add the optional cc_out operand
12332 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12333
12334 // On Thumb1, move all input operands to the end, then add the predicate
12335 if (Subtarget->isThumb1Only()) {
12336 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12337 MI.addOperand(MI.getOperand(1));
12338 MI.removeOperand(1);
12339 }
12340
12341 // Restore the ties
12342 for (unsigned i = MI.getNumOperands(); i--;) {
12343 const MachineOperand& op = MI.getOperand(i);
12344 if (op.isReg() && op.isUse()) {
12345 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12346 if (DefIdx != -1)
12347 MI.tieOperands(DefIdx, i);
12348 }
12349 }
12350
12351 MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
12352 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12353 ccOutIdx = 1;
12354 } else
12355 ccOutIdx = MCID->getNumOperands() - 1;
12356 } else
12357 ccOutIdx = MCID->getNumOperands() - 1;
12358
12359 // Any ARM instruction that sets the 's' bit should specify an optional
12360 // "cc_out" operand in the last operand position.
12361 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12362 assert(!NewOpc && "Optional cc_out operand required")(static_cast <bool> (!NewOpc && "Optional cc_out operand required"
) ? void (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12362, __extension__
__PRETTY_FUNCTION__))
;
12363 return;
12364 }
12365 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12366 // since we already have an optional CPSR def.
12367 bool definesCPSR = false;
12368 bool deadCPSR = false;
12369 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12370 ++i) {
12371 const MachineOperand &MO = MI.getOperand(i);
12372 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12373 definesCPSR = true;
12374 if (MO.isDead())
12375 deadCPSR = true;
12376 MI.removeOperand(i);
12377 break;
12378 }
12379 }
12380 if (!definesCPSR) {
12381 assert(!NewOpc && "Optional cc_out operand required")(static_cast <bool> (!NewOpc && "Optional cc_out operand required"
) ? void (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12381, __extension__
__PRETTY_FUNCTION__))
;
12382 return;
12383 }
12384 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag")(static_cast <bool> (deadCPSR == !Node->hasAnyUseOfValue
(1) && "inconsistent dead flag") ? void (0) : __assert_fail
("deadCPSR == !Node->hasAnyUseOfValue(1) && \"inconsistent dead flag\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12384, __extension__
__PRETTY_FUNCTION__))
;
12385 if (deadCPSR) {
12386 assert(!MI.getOperand(ccOutIdx).getReg() &&(static_cast <bool> (!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand") ? void (0) :
__assert_fail ("!MI.getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12387, __extension__
__PRETTY_FUNCTION__))
12387 "expect uninitialized optional cc_out operand")(static_cast <bool> (!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand") ? void (0) :
__assert_fail ("!MI.getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12387, __extension__
__PRETTY_FUNCTION__))
;
12388 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12389 if (!Subtarget->isThumb1Only())
12390 return;
12391 }
12392
12393 // If this instruction was defined with an optional CPSR def and its dag node
12394 // had a live implicit CPSR def, then activate the optional CPSR def.
12395 MachineOperand &MO = MI.getOperand(ccOutIdx);
12396 MO.setReg(ARM::CPSR);
12397 MO.setIsDef(true);
12398}
12399
12400//===----------------------------------------------------------------------===//
12401// ARM Optimization Hooks
12402//===----------------------------------------------------------------------===//
12403
12404// Helper function that checks if N is a null or all ones constant.
12405static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12406 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
12407}
12408
12409// Return true if N is conditionally 0 or all ones.
12410// Detects these expressions where cc is an i1 value:
12411//
12412// (select cc 0, y) [AllOnes=0]
12413// (select cc y, 0) [AllOnes=0]
12414// (zext cc) [AllOnes=0]
12415// (sext cc) [AllOnes=0/1]
12416// (select cc -1, y) [AllOnes=1]
12417// (select cc y, -1) [AllOnes=1]
12418//
12419// Invert is set when N is the null/all ones constant when CC is false.
12420// OtherOp is set to the alternative value of N.
12421static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
12422 SDValue &CC, bool &Invert,
12423 SDValue &OtherOp,
12424 SelectionDAG &DAG) {
12425 switch (N->getOpcode()) {
12426 default: return false;
12427 case ISD::SELECT: {
12428 CC = N->getOperand(0);
12429 SDValue N1 = N->getOperand(1);
12430 SDValue N2 = N->getOperand(2);
12431 if (isZeroOrAllOnes(N1, AllOnes)) {
12432 Invert = false;
12433 OtherOp = N2;
12434 return true;
12435 }
12436 if (isZeroOrAllOnes(N2, AllOnes)) {
12437 Invert = true;
12438 OtherOp = N1;
12439 return true;
12440 }
12441 return false;
12442 }
12443 case ISD::ZERO_EXTEND:
12444 // (zext cc) can never be the all ones value.
12445 if (AllOnes)
12446 return false;
12447 [[fallthrough]];
12448 case ISD::SIGN_EXTEND: {
12449 SDLoc dl(N);
12450 EVT VT = N->getValueType(0);
12451 CC = N->getOperand(0);
12452 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12453 return false;
12454 Invert = !AllOnes;
12455 if (AllOnes)
12456 // When looking for an AllOnes constant, N is an sext, and the 'other'
12457 // value is 0.
12458 OtherOp = DAG.getConstant(0, dl, VT);
12459 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12460 // When looking for a 0 constant, N can be zext or sext.
12461 OtherOp = DAG.getConstant(1, dl, VT);
12462 else
12463 OtherOp = DAG.getAllOnesConstant(dl, VT);
12464 return true;
12465 }
12466 }
12467}
12468
12469// Combine a constant select operand into its use:
12470//
12471// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12472// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12473// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12474// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12475// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12476//
12477// The transform is rejected if the select doesn't have a constant operand that
12478// is null, or all ones when AllOnes is set.
12479//
12480// Also recognize sext/zext from i1:
12481//
12482// (add (zext cc), x) -> (select cc (add x, 1), x)
12483// (add (sext cc), x) -> (select cc (add x, -1), x)
12484//
12485// These transformations eventually create predicated instructions.
12486//
12487// @param N The node to transform.
12488// @param Slct The N operand that is a select.
12489// @param OtherOp The other N operand (x above).
12490// @param DCI Context.
12491// @param AllOnes Require the select constant to be all ones instead of null.
12492// @returns The new node, or SDValue() on failure.
12493static
12494SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
12495 TargetLowering::DAGCombinerInfo &DCI,
12496 bool AllOnes = false) {
12497 SelectionDAG &DAG = DCI.DAG;
12498 EVT VT = N->getValueType(0);
12499 SDValue NonConstantVal;
12500 SDValue CCOp;
12501 bool SwapSelectOps;
12502 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12503 NonConstantVal, DAG))
12504 return SDValue();
12505
12506 // Slct is now know to be the desired identity constant when CC is true.
12507 SDValue TrueVal = OtherOp;
12508 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12509 OtherOp, NonConstantVal);
12510 // Unless SwapSelectOps says CC should be false.
12511 if (SwapSelectOps)
12512 std::swap(TrueVal, FalseVal);
12513
12514 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12515 CCOp, TrueVal, FalseVal);
12516}
12517
12518// Attempt combineSelectAndUse on each operand of a commutative operator N.
12519static
12520SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
12521 TargetLowering::DAGCombinerInfo &DCI) {
12522 SDValue N0 = N->getOperand(0);
12523 SDValue N1 = N->getOperand(1);
12524 if (N0.getNode()->hasOneUse())
12525 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12526 return Result;
12527 if (N1.getNode()->hasOneUse())
12528 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12529 return Result;
12530 return SDValue();
12531}
12532
12533static bool IsVUZPShuffleNode(SDNode *N) {
12534 // VUZP shuffle node.
12535 if (N->getOpcode() == ARMISD::VUZP)
12536 return true;
12537
12538 // "VUZP" on i32 is an alias for VTRN.
12539 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12540 return true;
12541
12542 return false;
12543}
12544
12545static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
12546 TargetLowering::DAGCombinerInfo &DCI,
12547 const ARMSubtarget *Subtarget) {
12548 // Look for ADD(VUZP.0, VUZP.1).
12549 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12550 N0 == N1)
12551 return SDValue();
12552
12553 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12554 if (!N->getValueType(0).is64BitVector())
12555 return SDValue();
12556
12557 // Generate vpadd.
12558 SelectionDAG &DAG = DCI.DAG;
12559 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12560 SDLoc dl(N);
12561 SDNode *Unzip = N0.getNode();
12562 EVT VT = N->getValueType(0);
12563
12564 SmallVector<SDValue, 8> Ops;
12565 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12566 TLI.getPointerTy(DAG.getDataLayout())));
12567 Ops.push_back(Unzip->getOperand(0));
12568 Ops.push_back(Unzip->getOperand(1));
12569
12570 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12571}
12572
12573static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
12574 TargetLowering::DAGCombinerInfo &DCI,
12575 const ARMSubtarget *Subtarget) {
12576 // Check for two extended operands.
12577 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12578 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12579 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12580 N1.getOpcode() == ISD::ZERO_EXTEND))
12581 return SDValue();
12582
12583 SDValue N00 = N0.getOperand(0);
12584 SDValue N10 = N1.getOperand(0);
12585
12586 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12587 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12588 N00 == N10)
12589 return SDValue();
12590
12591 // We only recognize Q register paddl here; this can't be reached until
12592 // after type legalization.
12593 if (!N00.getValueType().is64BitVector() ||
12594 !N0.getValueType().is128BitVector())
12595 return SDValue();
12596
12597 // Generate vpaddl.
12598 SelectionDAG &DAG = DCI.DAG;
12599 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12600 SDLoc dl(N);
12601 EVT VT = N->getValueType(0);
12602
12603 SmallVector<SDValue, 8> Ops;
12604 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12605 unsigned Opcode;
12606 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12607 Opcode = Intrinsic::arm_neon_vpaddls;
12608 else
12609 Opcode = Intrinsic::arm_neon_vpaddlu;
12610 Ops.push_back(DAG.getConstant(Opcode, dl,
12611 TLI.getPointerTy(DAG.getDataLayout())));
12612 EVT ElemTy = N00.getValueType().getVectorElementType();
12613 unsigned NumElts = VT.getVectorNumElements();
12614 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12615 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12616 N00.getOperand(0), N00.getOperand(1));
12617 Ops.push_back(Concat);
12618
12619 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12620}
12621
12622// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12623// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12624// much easier to match.
12625static SDValue
12626AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
12627 TargetLowering::DAGCombinerInfo &DCI,
12628 const ARMSubtarget *Subtarget) {
12629 // Only perform optimization if after legalize, and if NEON is available. We
12630 // also expected both operands to be BUILD_VECTORs.
12631 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12632 || N0.getOpcode() != ISD::BUILD_VECTOR
12633 || N1.getOpcode() != ISD::BUILD_VECTOR)
12634 return SDValue();
12635
12636 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12637 EVT VT = N->getValueType(0);
12638 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12639 return SDValue();
12640
12641 // Check that the vector operands are of the right form.
12642 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12643 // operands, where N is the size of the formed vector.
12644 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12645 // index such that we have a pair wise add pattern.
12646
12647 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12648 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12649 return SDValue();
12650 SDValue Vec = N0->getOperand(0)->getOperand(0);
12651 SDNode *V = Vec.getNode();
12652 unsigned nextIndex = 0;
12653
12654 // For each operands to the ADD which are BUILD_VECTORs,
12655 // check to see if each of their operands are an EXTRACT_VECTOR with
12656 // the same vector and appropriate index.
12657 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12658 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
12659 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12660
12661 SDValue ExtVec0 = N0->getOperand(i);
12662 SDValue ExtVec1 = N1->getOperand(i);
12663
12664 // First operand is the vector, verify its the same.
12665 if (V != ExtVec0->getOperand(0).getNode() ||
12666 V != ExtVec1->getOperand(0).getNode())
12667 return SDValue();
12668
12669 // Second is the constant, verify its correct.
12670 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12671 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12672
12673 // For the constant, we want to see all the even or all the odd.
12674 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12675 || C1->getZExtValue() != nextIndex+1)
12676 return SDValue();
12677
12678 // Increment index.
12679 nextIndex+=2;
12680 } else
12681 return SDValue();
12682 }
12683
12684 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12685 // we're using the entire input vector, otherwise there's a size/legality
12686 // mismatch somewhere.
12687 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12688 Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
12689 return SDValue();
12690
12691 // Create VPADDL node.
12692 SelectionDAG &DAG = DCI.DAG;
12693 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12694
12695 SDLoc dl(N);
12696
12697 // Build operand list.
12698 SmallVector<SDValue, 8> Ops;
12699 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12700 TLI.getPointerTy(DAG.getDataLayout())));
12701
12702 // Input is the vector.
12703 Ops.push_back(Vec);
12704
12705 // Get widened type and narrowed type.
12706 MVT widenType;
12707 unsigned numElem = VT.getVectorNumElements();
12708
12709 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12710 switch (inputLaneType.getSimpleVT().SimpleTy) {
12711 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12712 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12713 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12714 default:
12715 llvm_unreachable("Invalid vector element type for padd optimization.")::llvm::llvm_unreachable_internal("Invalid vector element type for padd optimization."
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12715)
;
12716 }
12717
12718 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12719 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12720 return DAG.getNode(ExtOp, dl, VT, tmp);
12721}
12722
12723static SDValue findMUL_LOHI(SDValue V) {
12724 if (V->getOpcode() == ISD::UMUL_LOHI ||
12725 V->getOpcode() == ISD::SMUL_LOHI)
12726 return V;
12727 return SDValue();
12728}
12729
12730static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12731 TargetLowering::DAGCombinerInfo &DCI,
12732 const ARMSubtarget *Subtarget) {
12733 if (!Subtarget->hasBaseDSP())
12734 return SDValue();
12735
12736 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12737 // accumulates the product into a 64-bit value. The 16-bit values will
12738 // be sign extended somehow or SRA'd into 32-bit values
12739 // (addc (adde (mul 16bit, 16bit), lo), hi)
12740 SDValue Mul = AddcNode->getOperand(0);
12741 SDValue Lo = AddcNode->getOperand(1);
12742 if (Mul.getOpcode() != ISD::MUL) {
12743 Lo = AddcNode->getOperand(0);
12744 Mul = AddcNode->getOperand(1);
12745 if (Mul.getOpcode() != ISD::MUL)
12746 return SDValue();
12747 }
12748
12749 SDValue SRA = AddeNode->getOperand(0);
12750 SDValue Hi = AddeNode->getOperand(1);
12751 if (SRA.getOpcode() != ISD::SRA) {
12752 SRA = AddeNode->getOperand(1);
12753 Hi = AddeNode->getOperand(0);
12754 if (SRA.getOpcode() != ISD::SRA)
12755 return SDValue();
12756 }
12757 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12758 if (Const->getZExtValue() != 31)
12759 return SDValue();
12760 } else
12761 return SDValue();
12762
12763 if (SRA.getOperand(0) != Mul)
12764 return SDValue();
12765
12766 SelectionDAG &DAG = DCI.DAG;
12767 SDLoc dl(AddcNode);
12768 unsigned Opcode = 0;
12769 SDValue Op0;
12770 SDValue Op1;
12771
12772 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12773 Opcode = ARMISD::SMLALBB;
12774 Op0 = Mul.getOperand(0);
12775 Op1 = Mul.getOperand(1);
12776 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12777 Opcode = ARMISD::SMLALBT;
12778 Op0 = Mul.getOperand(0);
12779 Op1 = Mul.getOperand(1).getOperand(0);
12780 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12781 Opcode = ARMISD::SMLALTB;
12782 Op0 = Mul.getOperand(0).getOperand(0);
12783 Op1 = Mul.getOperand(1);
12784 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12785 Opcode = ARMISD::SMLALTT;
12786 Op0 = Mul->getOperand(0).getOperand(0);
12787 Op1 = Mul->getOperand(1).getOperand(0);
12788 }
12789
12790 if (!Op0 || !Op1)
12791 return SDValue();
12792
12793 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12794 Op0, Op1, Lo, Hi);
12795 // Replace the ADDs' nodes uses by the MLA node's values.
12796 SDValue HiMLALResult(SMLAL.getNode(), 1);
12797 SDValue LoMLALResult(SMLAL.getNode(), 0);
12798
12799 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12800 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12801
12802 // Return original node to notify the driver to stop replacing.
12803 SDValue resNode(AddcNode, 0);
12804 return resNode;
12805}
12806
12807static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
12808 TargetLowering::DAGCombinerInfo &DCI,
12809 const ARMSubtarget *Subtarget) {
12810 // Look for multiply add opportunities.
12811 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12812 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12813 // a glue link from the first add to the second add.
12814 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12815 // a S/UMLAL instruction.
12816 // UMUL_LOHI
12817 // / :lo \ :hi
12818 // V \ [no multiline comment]
12819 // loAdd -> ADDC |
12820 // \ :carry /
12821 // V V
12822 // ADDE <- hiAdd
12823 //
12824 // In the special case where only the higher part of a signed result is used
12825 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12826 // a constant with the exact value of 0x80000000, we recognize we are dealing
12827 // with a "rounded multiply and add" (or subtract) and transform it into
12828 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12829
12830 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||(static_cast <bool> ((AddeSubeNode->getOpcode() == ARMISD
::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
"Expect an ADDE or SUBE") ? void (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12832, __extension__
__PRETTY_FUNCTION__))
12831 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&(static_cast <bool> ((AddeSubeNode->getOpcode() == ARMISD
::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
"Expect an ADDE or SUBE") ? void (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12832, __extension__
__PRETTY_FUNCTION__))
12832 "Expect an ADDE or SUBE")(static_cast <bool> ((AddeSubeNode->getOpcode() == ARMISD
::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
"Expect an ADDE or SUBE") ? void (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12832, __extension__
__PRETTY_FUNCTION__))
;
12833
12834 assert(AddeSubeNode->getNumOperands() == 3 &&(static_cast <bool> (AddeSubeNode->getNumOperands() ==
3 && AddeSubeNode->getOperand(2).getValueType() ==
MVT::i32 && "ADDE node has the wrong inputs") ? void
(0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12836, __extension__
__PRETTY_FUNCTION__))
12835 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&(static_cast <bool> (AddeSubeNode->getNumOperands() ==
3 && AddeSubeNode->getOperand(2).getValueType() ==
MVT::i32 && "ADDE node has the wrong inputs") ? void
(0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12836, __extension__
__PRETTY_FUNCTION__))
12836 "ADDE node has the wrong inputs")(static_cast <bool> (AddeSubeNode->getNumOperands() ==
3 && AddeSubeNode->getOperand(2).getValueType() ==
MVT::i32 && "ADDE node has the wrong inputs") ? void
(0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12836, __extension__
__PRETTY_FUNCTION__))
;
12837
12838 // Check that we are chained to the right ADDC or SUBC node.
12839 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12840 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12841 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12842 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12843 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12844 return SDValue();
12845
12846 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12847 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12848
12849 // Check if the two operands are from the same mul_lohi node.
12850 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12851 return SDValue();
12852
12853 assert(AddcSubcNode->getNumValues() == 2 &&(static_cast <bool> (AddcSubcNode->getNumValues() ==
2 && AddcSubcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32") ? void (0)
: __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12855, __extension__
__PRETTY_FUNCTION__))
12854 AddcSubcNode->getValueType(0) == MVT::i32 &&(static_cast <bool> (AddcSubcNode->getNumValues() ==
2 && AddcSubcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32") ? void (0)
: __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12855, __extension__
__PRETTY_FUNCTION__))
12855 "Expect ADDC with two result values. First: i32")(static_cast <bool> (AddcSubcNode->getNumValues() ==
2 && AddcSubcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32") ? void (0)
: __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12855, __extension__
__PRETTY_FUNCTION__))
;
12856
12857 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12858 // maybe a SMLAL which multiplies two 16-bit values.
12859 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12860 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12861 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12862 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12863 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12864 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12865
12866 // Check for the triangle shape.
12867 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12868 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12869
12870 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12871 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12872 return SDValue();
12873
12874 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12875 bool IsLeftOperandMUL = false;
12876 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12877 if (MULOp == SDValue())
12878 MULOp = findMUL_LOHI(AddeSubeOp1);
12879 else
12880 IsLeftOperandMUL = true;
12881 if (MULOp == SDValue())
12882 return SDValue();
12883
12884 // Figure out the right opcode.
12885 unsigned Opc = MULOp->getOpcode();
12886 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12887
12888 // Figure out the high and low input values to the MLAL node.
12889 SDValue *HiAddSub = nullptr;
12890 SDValue *LoMul = nullptr;
12891 SDValue *LowAddSub = nullptr;
12892
12893 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12894 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12895 return SDValue();
12896
12897 if (IsLeftOperandMUL)
12898 HiAddSub = &AddeSubeOp1;
12899 else
12900 HiAddSub = &AddeSubeOp0;
12901
12902 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12903 // whose low result is fed to the ADDC/SUBC we are checking.
12904
12905 if (AddcSubcOp0 == MULOp.getValue(0)) {
12906 LoMul = &AddcSubcOp0;
12907 LowAddSub = &AddcSubcOp1;
12908 }
12909 if (AddcSubcOp1 == MULOp.getValue(0)) {
12910 LoMul = &AddcSubcOp1;
12911 LowAddSub = &AddcSubcOp0;
12912 }
12913
12914 if (!LoMul)
12915 return SDValue();
12916
12917 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12918 // the replacement below will create a cycle.
12919 if (AddcSubcNode == HiAddSub->getNode() ||
12920 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12921 return SDValue();
12922
12923 // Create the merged node.
12924 SelectionDAG &DAG = DCI.DAG;
12925
12926 // Start building operand list.
12927 SmallVector<SDValue, 8> Ops;
12928 Ops.push_back(LoMul->getOperand(0));
12929 Ops.push_back(LoMul->getOperand(1));
12930
12931 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
12932 // the case, we must be doing signed multiplication and only use the higher
12933 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
12934 // addition or subtraction with the value of 0x800000.
12935 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
12936 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
12937 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
12938 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
12939 0x80000000) {
12940 Ops.push_back(*HiAddSub);
12941 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
12942 FinalOpc = ARMISD::SMMLSR;
12943 } else {
12944 FinalOpc = ARMISD::SMMLAR;
12945 }
12946 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
12947 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
12948
12949 return SDValue(AddeSubeNode, 0);
12950 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
12951 // SMMLS is generated during instruction selection and the rest of this
12952 // function can not handle the case where AddcSubcNode is a SUBC.
12953 return SDValue();
12954
12955 // Finish building the operand list for {U/S}MLAL
12956 Ops.push_back(*LowAddSub);
12957 Ops.push_back(*HiAddSub);
12958
12959 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
12960 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12961
12962 // Replace the ADDs' nodes uses by the MLA node's values.
12963 SDValue HiMLALResult(MLALNode.getNode(), 1);
12964 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
12965
12966 SDValue LoMLALResult(MLALNode.getNode(), 0);
12967 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
12968
12969 // Return original node to notify the driver to stop replacing.
12970 return SDValue(AddeSubeNode, 0);
12971}
12972
12973static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
12974 TargetLowering::DAGCombinerInfo &DCI,
12975 const ARMSubtarget *Subtarget) {
12976 // UMAAL is similar to UMLAL except that it adds two unsigned values.
12977 // While trying to combine for the other MLAL nodes, first search for the
12978 // chance to use UMAAL. Check if Addc uses a node which has already
12979 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
12980 // as the addend, and it's handled in PerformUMLALCombine.
12981
12982 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12983 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12984
12985 // Check that we have a glued ADDC node.
12986 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
12987 if (AddcNode->getOpcode() != ARMISD::ADDC)
12988 return SDValue();
12989
12990 // Find the converted UMAAL or quit if it doesn't exist.
12991 SDNode *UmlalNode = nullptr;
12992 SDValue AddHi;
12993 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
12994 UmlalNode = AddcNode->getOperand(0).getNode();
12995 AddHi = AddcNode->getOperand(1);
12996 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
12997 UmlalNode = AddcNode->getOperand(1).getNode();
12998 AddHi = AddcNode->getOperand(0);
12999 } else {
13000 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13001 }
13002
13003 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13004 // the ADDC as well as Zero.
13005 if (!isNullConstant(UmlalNode->getOperand(3)))
13006 return SDValue();
13007
13008 if ((isNullConstant(AddeNode->getOperand(0)) &&
13009 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13010 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13011 isNullConstant(AddeNode->getOperand(1)))) {
13012 SelectionDAG &DAG = DCI.DAG;
13013 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13014 UmlalNode->getOperand(2), AddHi };
13015 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13016 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13017
13018 // Replace the ADDs' nodes uses by the UMAAL node's values.
13019 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13020 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13021
13022 // Return original node to notify the driver to stop replacing.
13023 return SDValue(AddeNode, 0);
13024 }
13025 return SDValue();
13026}
13027
13028static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
13029 const ARMSubtarget *Subtarget) {
13030 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13031 return SDValue();
13032
13033 // Check that we have a pair of ADDC and ADDE as operands.
13034 // Both addends of the ADDE must be zero.
13035 SDNode* AddcNode = N->getOperand(2).getNode();
13036 SDNode* AddeNode = N->getOperand(3).getNode();
13037 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13038 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13039 isNullConstant(AddeNode->getOperand(0)) &&
13040 isNullConstant(AddeNode->getOperand(1)) &&
13041 (AddeNode->getOperand(2).getNode() == AddcNode))
13042 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13043 DAG.getVTList(MVT::i32, MVT::i32),
13044 {N->getOperand(0), N->getOperand(1),
13045 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13046 else
13047 return SDValue();
13048}
13049
13050static SDValue PerformAddcSubcCombine(SDNode *N,
13051 TargetLowering::DAGCombinerInfo &DCI,
13052 const ARMSubtarget *Subtarget) {
13053 SelectionDAG &DAG(DCI.DAG);
13054
13055 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13056 // (SUBC (ADDE 0, 0, C), 1) -> C
13057 SDValue LHS = N->getOperand(0);
13058 SDValue RHS = N->getOperand(1);
13059 if (LHS->getOpcode() == ARMISD::ADDE &&
13060 isNullConstant(LHS->getOperand(0)) &&
13061 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13062 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13063 }
13064 }
13065
13066 if (Subtarget->isThumb1Only()) {
13067 SDValue RHS = N->getOperand(1);
13068 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13069 int32_t imm = C->getSExtValue();
13070 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13071 SDLoc DL(N);
13072 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13073 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13074 : ARMISD::ADDC;
13075 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13076 }
13077 }
13078 }
13079
13080 return SDValue();
13081}
13082
13083static SDValue PerformAddeSubeCombine(SDNode *N,
13084 TargetLowering::DAGCombinerInfo &DCI,
13085 const ARMSubtarget *Subtarget) {
13086 if (Subtarget->isThumb1Only()) {
13087 SelectionDAG &DAG = DCI.DAG;
13088 SDValue RHS = N->getOperand(1);
13089 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13090 int64_t imm = C->getSExtValue();
13091 if (imm < 0) {
13092 SDLoc DL(N);
13093
13094 // The with-carry-in form matches bitwise not instead of the negation.
13095 // Effectively, the inverse interpretation of the carry flag already
13096 // accounts for part of the negation.
13097 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13098
13099 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13100 : ARMISD::ADDE;
13101 return DAG.getNode(Opcode, DL, N->getVTList(),
13102 N->getOperand(0), RHS, N->getOperand(2));
13103 }
13104 }
13105 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13106 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13107 }
13108 return SDValue();
13109}
13110
13111static SDValue PerformSELECTCombine(SDNode *N,
13112 TargetLowering::DAGCombinerInfo &DCI,
13113 const ARMSubtarget *Subtarget) {
13114 if (!Subtarget->hasMVEIntegerOps())
13115 return SDValue();
13116
13117 SDLoc dl(N);
13118 SDValue SetCC;
13119 SDValue LHS;
13120 SDValue RHS;
13121 ISD::CondCode CC;
13122 SDValue TrueVal;
13123 SDValue FalseVal;
13124
13125 if (N->getOpcode() == ISD::SELECT &&
13126 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13127 SetCC = N->getOperand(0);
13128 LHS = SetCC->getOperand(0);
13129 RHS = SetCC->getOperand(1);
13130 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13131 TrueVal = N->getOperand(1);
13132 FalseVal = N->getOperand(2);
13133 } else if (N->getOpcode() == ISD::SELECT_CC) {
13134 LHS = N->getOperand(0);
13135 RHS = N->getOperand(1);
13136 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13137 TrueVal = N->getOperand(2);
13138 FalseVal = N->getOperand(3);
13139 } else {
13140 return SDValue();
13141 }
13142
13143 unsigned int Opcode = 0;
13144 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13145 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13146 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13147 Opcode = ARMISD::VMINVu;
13148 if (CC == ISD::SETUGT)
13149 std::swap(TrueVal, FalseVal);
13150 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13151 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13152 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13153 Opcode = ARMISD::VMINVs;
13154 if (CC == ISD::SETGT)
13155 std::swap(TrueVal, FalseVal);
13156 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13157 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13158 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13159 Opcode = ARMISD::VMAXVu;
13160 if (CC == ISD::SETULT)
13161 std::swap(TrueVal, FalseVal);
13162 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13163 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13164 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13165 Opcode = ARMISD::VMAXVs;
13166 if (CC == ISD::SETLT)
13167 std::swap(TrueVal, FalseVal);
13168 } else
13169 return SDValue();
13170
13171 // Normalise to the right hand side being the vector reduction
13172 switch (TrueVal->getOpcode()) {
13173 case ISD::VECREDUCE_UMIN:
13174 case ISD::VECREDUCE_SMIN:
13175 case ISD::VECREDUCE_UMAX:
13176 case ISD::VECREDUCE_SMAX:
13177 std::swap(LHS, RHS);
13178 std::swap(TrueVal, FalseVal);
13179 break;
13180 }
13181
13182 EVT VectorType = FalseVal->getOperand(0).getValueType();
13183
13184 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13185 VectorType != MVT::v4i32)
13186 return SDValue();
13187
13188 EVT VectorScalarType = VectorType.getVectorElementType();
13189
13190 // The values being selected must also be the ones being compared
13191 if (TrueVal != LHS || FalseVal != RHS)
13192 return SDValue();
13193
13194 EVT LeftType = LHS->getValueType(0);
13195 EVT RightType = RHS->getValueType(0);
13196
13197 // The types must match the reduced type too
13198 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13199 return SDValue();
13200
13201 // Legalise the scalar to an i32
13202 if (VectorScalarType != MVT::i32)
13203 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13204
13205 // Generate the reduction as an i32 for legalisation purposes
13206 auto Reduction =
13207 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13208
13209 // The result isn't actually an i32 so truncate it back to its original type
13210 if (VectorScalarType != MVT::i32)
13211 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13212
13213 return Reduction;
13214}
13215
13216// A special combine for the vqdmulh family of instructions. This is one of the
13217// potential set of patterns that could patch this instruction. The base pattern
13218// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13219// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13220// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13221// the max is unnecessary.
13222static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
13223 EVT VT = N->getValueType(0);
13224 SDValue Shft;
13225 ConstantSDNode *Clamp;
13226
13227 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13228 return SDValue();
13229
13230 if (N->getOpcode() == ISD::SMIN) {
13231 Shft = N->getOperand(0);
13232 Clamp = isConstOrConstSplat(N->getOperand(1));
13233 } else if (N->getOpcode() == ISD::VSELECT) {
13234 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13235 SDValue Cmp = N->getOperand(0);
13236 if (Cmp.getOpcode() != ISD::SETCC ||
13237 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13238 Cmp.getOperand(0) != N->getOperand(1) ||
13239 Cmp.getOperand(1) != N->getOperand(2))
13240 return SDValue();
13241 Shft = N->getOperand(1);
13242 Clamp = isConstOrConstSplat(N->getOperand(2));
13243 } else
13244 return SDValue();
13245
13246 if (!Clamp)
13247 return SDValue();
13248
13249 MVT ScalarType;
13250 int ShftAmt = 0;
13251 switch (Clamp->getSExtValue()) {
13252 case (1 << 7) - 1:
13253 ScalarType = MVT::i8;
13254 ShftAmt = 7;
13255 break;
13256 case (1 << 15) - 1:
13257 ScalarType = MVT::i16;
13258 ShftAmt = 15;
13259 break;
13260 case (1ULL << 31) - 1:
13261 ScalarType = MVT::i32;
13262 ShftAmt = 31;
13263 break;
13264 default:
13265 return SDValue();
13266 }
13267
13268 if (Shft.getOpcode() != ISD::SRA)
13269 return SDValue();
13270 ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
13271 if (!N1 || N1->getSExtValue() != ShftAmt)
13272 return SDValue();
13273
13274 SDValue Mul = Shft.getOperand(0);
13275 if (Mul.getOpcode() != ISD::MUL)
13276 return SDValue();
13277
13278 SDValue Ext0 = Mul.getOperand(0);
13279 SDValue Ext1 = Mul.getOperand(1);
13280 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13281 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13282 return SDValue();
13283 EVT VecVT = Ext0.getOperand(0).getValueType();
13284 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13285 return SDValue();
13286 if (Ext1.getOperand(0).getValueType() != VecVT ||
13287 VecVT.getScalarType() != ScalarType ||
13288 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13289 return SDValue();
13290
13291 SDLoc DL(Mul);
13292 unsigned LegalLanes = 128 / (ShftAmt + 1);
13293 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13294 // For types smaller than legal vectors extend to be legal and only use needed
13295 // lanes.
13296 if (VecVT.getSizeInBits() < 128) {
13297 EVT ExtVecVT =
13298 MVT::getVectorVT(MVT::getIntegerVT(128 / VecVT.getVectorNumElements()),
13299 VecVT.getVectorNumElements());
13300 SDValue Inp0 =
13301 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13302 SDValue Inp1 =
13303 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13304 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13305 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13306 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13307 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13308 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13309 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13310 }
13311
13312 // For larger types, split into legal sized chunks.
13313 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type")(static_cast <bool> (VecVT.getSizeInBits() % 128 == 0 &&
"Expected a power2 type") ? void (0) : __assert_fail ("VecVT.getSizeInBits() % 128 == 0 && \"Expected a power2 type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13313, __extension__
__PRETTY_FUNCTION__))
;
13314 unsigned NumParts = VecVT.getSizeInBits() / 128;
13315 SmallVector<SDValue> Parts;
13316 for (unsigned I = 0; I < NumParts; ++I) {
13317 SDValue Inp0 =
13318 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13319 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13320 SDValue Inp1 =
13321 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13322 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13323 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13324 Parts.push_back(VQDMULH);
13325 }
13326 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13327 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13328}
13329
13330static SDValue PerformVSELECTCombine(SDNode *N,
13331 TargetLowering::DAGCombinerInfo &DCI,
13332 const ARMSubtarget *Subtarget) {
13333 if (!Subtarget->hasMVEIntegerOps())
13334 return SDValue();
13335
13336 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13337 return V;
13338
13339 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13340 //
13341 // We need to re-implement this optimization here as the implementation in the
13342 // Target-Independent DAGCombiner does not handle the kind of constant we make
13343 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13344 // good reason, allowing truncation there would break other targets).
13345 //
13346 // Currently, this is only done for MVE, as it's the only target that benefits
13347 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13348 if (N->getOperand(0).getOpcode() != ISD::XOR)
13349 return SDValue();
13350 SDValue XOR = N->getOperand(0);
13351
13352 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13353 // It is important to check with truncation allowed as the BUILD_VECTORs we
13354 // generate in those situations will truncate their operands.
13355 ConstantSDNode *Const =
13356 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13357 /*AllowTruncation*/ true);
13358 if (!Const || !Const->isOne())
13359 return SDValue();
13360
13361 // Rewrite into vselect(cond, rhs, lhs).
13362 SDValue Cond = XOR->getOperand(0);
13363 SDValue LHS = N->getOperand(1);
13364 SDValue RHS = N->getOperand(2);
13365 EVT Type = N->getValueType(0);
13366 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13367}
13368
13369// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13370static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
13371 TargetLowering::DAGCombinerInfo &DCI,
13372 const ARMSubtarget *Subtarget) {
13373 SDValue Op0 = N->getOperand(0);
13374 SDValue Op1 = N->getOperand(1);
13375 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13376 EVT VT = N->getValueType(0);
13377
13378 if (!Subtarget->hasMVEIntegerOps() ||
13379 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
13380 return SDValue();
13381
13382 if (CC == ISD::SETUGE) {
13383 std::swap(Op0, Op1);
13384 CC = ISD::SETULT;
13385 }
13386
13387 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13388 Op0.getOpcode() != ISD::BUILD_VECTOR)
13389 return SDValue();
13390
13391 // Check first operand is BuildVector of 0,1,2,...
13392 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13393 if (!Op0.getOperand(I).isUndef() &&
13394 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13395 Op0.getConstantOperandVal(I) == I))
13396 return SDValue();
13397 }
13398
13399 // The second is a Splat of Op1S
13400 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13401 if (!Op1S)
13402 return SDValue();
13403
13404 unsigned Opc;
13405 switch (VT.getVectorNumElements()) {
13406 case 2:
13407 Opc = Intrinsic::arm_mve_vctp64;
13408 break;
13409 case 4:
13410 Opc = Intrinsic::arm_mve_vctp32;
13411 break;
13412 case 8:
13413 Opc = Intrinsic::arm_mve_vctp16;
13414 break;
13415 case 16:
13416 Opc = Intrinsic::arm_mve_vctp8;
13417 break;
13418 default:
13419 return SDValue();
13420 }
13421
13422 SDLoc DL(N);
13423 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13424 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13425 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13426}
13427
13428static SDValue PerformABSCombine(SDNode *N,
13429 TargetLowering::DAGCombinerInfo &DCI,
13430 const ARMSubtarget *Subtarget) {
13431 SelectionDAG &DAG = DCI.DAG;
13432 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13433
13434 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13435 return SDValue();
13436
13437 return TLI.expandABS(N, DAG);
13438}
13439
13440/// PerformADDECombine - Target-specific dag combine transform from
13441/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13442/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13443static SDValue PerformADDECombine(SDNode *N,
13444 TargetLowering::DAGCombinerInfo &DCI,
13445 const ARMSubtarget *Subtarget) {
13446 // Only ARM and Thumb2 support UMLAL/SMLAL.
13447 if (Subtarget->isThumb1Only())
13448 return PerformAddeSubeCombine(N, DCI, Subtarget);
13449
13450 // Only perform the checks after legalize when the pattern is available.
13451 if (DCI.isBeforeLegalize()) return SDValue();
13452
13453 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13454}
13455
13456/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13457/// operands N0 and N1. This is a helper for PerformADDCombine that is
13458/// called with the default operands, and if that fails, with commuted
13459/// operands.
13460static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
13461 TargetLowering::DAGCombinerInfo &DCI,
13462 const ARMSubtarget *Subtarget){
13463 // Attempt to create vpadd for this add.
13464 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13465 return Result;
13466
13467 // Attempt to create vpaddl for this add.
13468 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13469 return Result;
13470 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13471 Subtarget))
13472 return Result;
13473
13474 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13475 if (N0.getNode()->hasOneUse())
13476 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13477 return Result;
13478 return SDValue();
13479}
13480
13481static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
13482 EVT VT = N->getValueType(0);
13483 SDValue N0 = N->getOperand(0);
13484 SDValue N1 = N->getOperand(1);
13485 SDLoc dl(N);
13486
13487 auto IsVecReduce = [](SDValue Op) {
13488 switch (Op.getOpcode()) {
13489 case ISD::VECREDUCE_ADD:
13490 case ARMISD::VADDVs:
13491 case ARMISD::VADDVu:
13492 case ARMISD::VMLAVs:
13493 case ARMISD::VMLAVu:
13494 return true;
13495 }
13496 return false;
13497 };
13498
13499 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13500 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13501 // add(add(X, vecreduce(Y)), vecreduce(Z))
13502 // to make better use of vaddva style instructions.
13503 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13504 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13505 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13506 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13507 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13508 }
13509 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13510 // add(add(add(A, C), reduce(B)), reduce(D))
13511 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13512 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13513 unsigned N0RedOp = 0;
13514 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13515 N0RedOp = 1;
13516 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13517 return SDValue();
13518 }
13519
13520 unsigned N1RedOp = 0;
13521 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13522 N1RedOp = 1;
13523 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13524 return SDValue();
13525
13526 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13527 N1.getOperand(1 - N1RedOp));
13528 SDValue Add1 =
13529 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13530 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13531 }
13532 return SDValue();
13533 };
13534 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13535 return R;
13536 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13537 return R;
13538
13539 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13540 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13541 // by ascending load offsets. This can help cores prefetch if the order of
13542 // loads is more predictable.
13543 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13544 // Check if two reductions are known to load data where one is before/after
13545 // another. Return negative if N0 loads data before N1, positive if N1 is
13546 // before N0 and 0 otherwise if nothing is known.
13547 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13548 // Look through to the first operand of a MUL, for the VMLA case.
13549 // Currently only looks at the first operand, in the hope they are equal.
13550 if (N0.getOpcode() == ISD::MUL)
13551 N0 = N0.getOperand(0);
13552 if (N1.getOpcode() == ISD::MUL)
13553 N1 = N1.getOperand(0);
13554
13555 // Return true if the two operands are loads to the same object and the
13556 // offset of the first is known to be less than the offset of the second.
13557 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13558 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13559 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13560 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13561 Load1->isIndexed())
13562 return 0;
13563
13564 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13565 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13566
13567 if (!BaseLocDecomp0.getBase() ||
13568 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13569 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13570 return 0;
13571 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13572 return -1;
13573 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13574 return 1;
13575 return 0;
13576 };
13577
13578 SDValue X;
13579 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13580 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13581 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13582 N0.getOperand(1).getOperand(0));
13583 if (IsBefore < 0) {
13584 X = N0.getOperand(0);
13585 N0 = N0.getOperand(1);
13586 } else if (IsBefore > 0) {
13587 X = N0.getOperand(1);
13588 N0 = N0.getOperand(0);
13589 } else
13590 return SDValue();
13591 } else if (IsVecReduce(N0.getOperand(0))) {
13592 X = N0.getOperand(1);
13593 N0 = N0.getOperand(0);
13594 } else if (IsVecReduce(N0.getOperand(1))) {
13595 X = N0.getOperand(0);
13596 N0 = N0.getOperand(1);
13597 } else
13598 return SDValue();
13599 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13600 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13601 // Note this is backward to how you would expect. We create
13602 // add(reduce(load + 16), reduce(load + 0)) so that the
13603 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13604 // the X as VADDV(load + 0)
13605 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13606 } else
13607 return SDValue();
13608
13609 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13610 return SDValue();
13611
13612 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13613 return SDValue();
13614
13615 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13616 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13617 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13618 };
13619 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13620 return R;
13621 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13622 return R;
13623 return SDValue();
13624}
13625
13626static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
13627 const ARMSubtarget *Subtarget) {
13628 if (!Subtarget->hasMVEIntegerOps())
13629 return SDValue();
13630
13631 if (SDValue R = TryDistrubutionADDVecReduce(N, DAG))
13632 return R;
13633
13634 EVT VT = N->getValueType(0);
13635 SDValue N0 = N->getOperand(0);
13636 SDValue N1 = N->getOperand(1);
13637 SDLoc dl(N);
13638
13639 if (VT != MVT::i64)
13640 return SDValue();
13641
13642 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13643 // will look like:
13644 // t1: i32,i32 = ARMISD::VADDLVs x
13645 // t2: i64 = build_pair t1, t1:1
13646 // t3: i64 = add t2, y
13647 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13648 // the add to be simplified seperately.
13649 // We also need to check for sext / zext and commutitive adds.
13650 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13651 SDValue NB) {
13652 if (NB->getOpcode() != ISD::BUILD_PAIR)
13653 return SDValue();
13654 SDValue VecRed = NB->getOperand(0);
13655 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13656 VecRed.getResNo() != 0 ||
13657 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13658 return SDValue();
13659
13660 if (VecRed->getOpcode() == OpcodeA) {
13661 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13662 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13663 VecRed.getOperand(0), VecRed.getOperand(1));
13664 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13665 }
13666
13667 SmallVector<SDValue, 4> Ops(2);
13668 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13669
13670 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13671 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13672 Ops.push_back(VecRed->getOperand(I));
13673 SDValue Red =
13674 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13675 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13676 SDValue(Red.getNode(), 1));
13677 };
13678
13679 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13680 return M;
13681 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13682 return M;
13683 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13684 return M;
13685 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13686 return M;
13687 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13688 return M;
13689 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13690 return M;
13691 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13692 return M;
13693 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13694 return M;
13695 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13696 return M;
13697 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13698 return M;
13699 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13700 return M;
13701 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13702 return M;
13703 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13704 return M;
13705 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13706 return M;
13707 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13708 return M;
13709 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13710 return M;
13711 return SDValue();
13712}
13713
13714bool
13715ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
13716 CombineLevel Level) const {
13717 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||(static_cast <bool> ((N->getOpcode() == ISD::SHL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL
) && "Expected shift op") ? void (0) : __assert_fail (
"(N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"Expected shift op\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13719, __extension__
__PRETTY_FUNCTION__))
13718 N->getOpcode() == ISD::SRL) &&(static_cast <bool> ((N->getOpcode() == ISD::SHL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL
) && "Expected shift op") ? void (0) : __assert_fail (
"(N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"Expected shift op\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13719, __extension__
__PRETTY_FUNCTION__))
13719 "Expected shift op")(static_cast <bool> ((N->getOpcode() == ISD::SHL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL
) && "Expected shift op") ? void (0) : __assert_fail (
"(N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"Expected shift op\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13719, __extension__
__PRETTY_FUNCTION__))
;
13720
13721 if (Level == BeforeLegalizeTypes)
13722 return true;
13723
13724 if (N->getOpcode() != ISD::SHL)
13725 return true;
13726
13727 if (Subtarget->isThumb1Only()) {
13728 // Avoid making expensive immediates by commuting shifts. (This logic
13729 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13730 // for free.)
13731 if (N->getOpcode() != ISD::SHL)
13732 return true;
13733 SDValue N1 = N->getOperand(0);
13734 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13735 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13736 return true;
13737 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13738 if (Const->getAPIntValue().ult(256))
13739 return false;
13740 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13741 Const->getAPIntValue().sgt(-256))
13742 return false;
13743 }
13744 return true;
13745 }
13746
13747 // Turn off commute-with-shift transform after legalization, so it doesn't
13748 // conflict with PerformSHLSimplify. (We could try to detect when
13749 // PerformSHLSimplify would trigger more precisely, but it isn't
13750 // really necessary.)
13751 return false;
13752}
13753
13754bool ARMTargetLowering::isDesirableToCommuteXorWithShift(
13755 const SDNode *N) const {
13756 assert(N->getOpcode() == ISD::XOR &&(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13759, __extension__
__PRETTY_FUNCTION__))
13757 (N->getOperand(0).getOpcode() == ISD::SHL ||(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13759, __extension__
__PRETTY_FUNCTION__))
13758 N->getOperand(0).getOpcode() == ISD::SRL) &&(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13759, __extension__
__PRETTY_FUNCTION__))
13759 "Expected XOR(SHIFT) pattern")(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13759, __extension__
__PRETTY_FUNCTION__))
;
13760
13761 // Only commute if the entire NOT mask is a hidden shifted mask.
13762 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13763 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13764 if (XorC && ShiftC) {
13765 unsigned MaskIdx, MaskLen;
13766 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13767 unsigned ShiftAmt = ShiftC->getZExtValue();
13768 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13769 if (N->getOperand(0).getOpcode() == ISD::SHL)
13770 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13771 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13772 }
13773 }
13774
13775 return false;
13776}
13777
13778bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
13779 const SDNode *N, CombineLevel Level) const {
13780 assert(((N->getOpcode() == ISD::SHL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13784, __extension__
__PRETTY_FUNCTION__))
13781 N->getOperand(0).getOpcode() == ISD::SRL) ||(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13784, __extension__
__PRETTY_FUNCTION__))
13782 (N->getOpcode() == ISD::SRL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13784, __extension__
__PRETTY_FUNCTION__))
13783 N->getOperand(0).getOpcode() == ISD::SHL)) &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13784, __extension__
__PRETTY_FUNCTION__))
13784 "Expected shift-shift mask")(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13784, __extension__
__PRETTY_FUNCTION__))
;
13785
13786 if (!Subtarget->isThumb1Only())
13787 return true;
13788
13789 if (Level == BeforeLegalizeTypes)
13790 return true;
13791
13792 return false;
13793}
13794
13795bool ARMTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
13796 EVT VT) const {
13797 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13798}
13799
13800bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
13801 if (!Subtarget->hasNEON()) {
13802 if (Subtarget->isThumb1Only())
13803 return VT.getScalarSizeInBits() <= 32;
13804 return true;
13805 }
13806 return VT.isScalarInteger();
13807}
13808
13809bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
13810 EVT VT) const {
13811 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13812 return false;
13813
13814 switch (FPVT.getSimpleVT().SimpleTy) {
13815 case MVT::f16:
13816 return Subtarget->hasVFP2Base();
13817 case MVT::f32:
13818 return Subtarget->hasVFP2Base();
13819 case MVT::f64:
13820 return Subtarget->hasFP64();
13821 case MVT::v4f32:
13822 case MVT::v8f16:
13823 return Subtarget->hasMVEFloatOps();
13824 default:
13825 return false;
13826 }
13827}
13828
13829static SDValue PerformSHLSimplify(SDNode *N,
13830 TargetLowering::DAGCombinerInfo &DCI,
13831 const ARMSubtarget *ST) {
13832 // Allow the generic combiner to identify potential bswaps.
13833 if (DCI.isBeforeLegalize())
13834 return SDValue();
13835
13836 // DAG combiner will fold:
13837 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13838 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13839 // Other code patterns that can be also be modified have the following form:
13840 // b + ((a << 1) | 510)
13841 // b + ((a << 1) & 510)
13842 // b + ((a << 1) ^ 510)
13843 // b + ((a << 1) + 510)
13844
13845 // Many instructions can perform the shift for free, but it requires both
13846 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13847 // instruction will needed. So, unfold back to the original pattern if:
13848 // - if c1 and c2 are small enough that they don't require mov imms.
13849 // - the user(s) of the node can perform an shl
13850
13851 // No shifted operands for 16-bit instructions.
13852 if (ST->isThumb() && ST->isThumb1Only())
13853 return SDValue();
13854
13855 // Check that all the users could perform the shl themselves.
13856 for (auto *U : N->uses()) {
13857 switch(U->getOpcode()) {
13858 default:
13859 return SDValue();
13860 case ISD::SUB:
13861 case ISD::ADD:
13862 case ISD::AND:
13863 case ISD::OR:
13864 case ISD::XOR:
13865 case ISD::SETCC:
13866 case ARMISD::CMP:
13867 // Check that the user isn't already using a constant because there
13868 // aren't any instructions that support an immediate operand and a
13869 // shifted operand.
13870 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13871 isa<ConstantSDNode>(U->getOperand(1)))
13872 return SDValue();
13873
13874 // Check that it's not already using a shift.
13875 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13876 U->getOperand(1).getOpcode() == ISD::SHL)
13877 return SDValue();
13878 break;
13879 }
13880 }
13881
13882 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13883 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13884 return SDValue();
13885
13886 if (N->getOperand(0).getOpcode() != ISD::SHL)
13887 return SDValue();
13888
13889 SDValue SHL = N->getOperand(0);
13890
13891 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13892 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13893 if (!C1ShlC2 || !C2)
13894 return SDValue();
13895
13896 APInt C2Int = C2->getAPIntValue();
13897 APInt C1Int = C1ShlC2->getAPIntValue();
13898 unsigned C2Width = C2Int.getBitWidth();
13899 if (C2Int.uge(C2Width))
13900 return SDValue();
13901 uint64_t C2Value = C2Int.getZExtValue();
13902
13903 // Check that performing a lshr will not lose any information.
13904 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13905 if ((C1Int & Mask) != C1Int)
13906 return SDValue();
13907
13908 // Shift the first constant.
13909 C1Int.lshrInPlace(C2Int);
13910
13911 // The immediates are encoded as an 8-bit value that can be rotated.
13912 auto LargeImm = [](const APInt &Imm) {
13913 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13914 return Imm.getBitWidth() - Zeros > 8;
13915 };
13916
13917 if (LargeImm(C1Int) || LargeImm(C2Int))
13918 return SDValue();
13919
13920 SelectionDAG &DAG = DCI.DAG;
13921 SDLoc dl(N);
13922 SDValue X = SHL.getOperand(0);
13923 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13924 DAG.getConstant(C1Int, dl, MVT::i32));
13925 // Shift left to compensate for the lshr of C1Int.
13926 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
13927
13928 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Simplify shl use:\n"; SHL.getOperand
(0).dump(); SHL.dump(); N->dump(); } } while (false)
13929 SHL.dump(); N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Simplify shl use:\n"; SHL.getOperand
(0).dump(); SHL.dump(); N->dump(); } } while (false)
;
13930 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Into:\n"; X.dump(); BinOp.dump
(); Res.dump(); } } while (false)
;
13931 return Res;
13932}
13933
13934
13935/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
13936///
13937static SDValue PerformADDCombine(SDNode *N,
13938 TargetLowering::DAGCombinerInfo &DCI,
13939 const ARMSubtarget *Subtarget) {
13940 SDValue N0 = N->getOperand(0);
13941 SDValue N1 = N->getOperand(1);
13942
13943 // Only works one way, because it needs an immediate operand.
13944 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13945 return Result;
13946
13947 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
13948 return Result;
13949
13950 // First try with the default operand order.
13951 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
13952 return Result;
13953
13954 // If that didn't work, try again with the operands commuted.
13955 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
13956}
13957
13958// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
13959// providing -X is as cheap as X (currently, just a constant).
13960static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
13961 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
13962 return SDValue();
13963 SDValue CSINC = N->getOperand(1);
13964 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
13965 return SDValue();
13966
13967 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
13968 if (!X)
13969 return SDValue();
13970
13971 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
13972 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
13973 CSINC.getOperand(0)),
13974 CSINC.getOperand(1), CSINC.getOperand(2),
13975 CSINC.getOperand(3));
13976}
13977
13978/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
13979///
13980static SDValue PerformSUBCombine(SDNode *N,
13981 TargetLowering::DAGCombinerInfo &DCI,
13982 const ARMSubtarget *Subtarget) {
13983 SDValue N0 = N->getOperand(0);
13984 SDValue N1 = N->getOperand(1);
13985
13986 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
13987 if (N1.getNode()->hasOneUse())
13988 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
13989 return Result;
13990
13991 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
13992 return R;
13993
13994 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
13995 return SDValue();
13996
13997 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
13998 // so that we can readily pattern match more mve instructions which can use
13999 // a scalar operand.
14000 SDValue VDup = N->getOperand(1);
14001 if (VDup->getOpcode() != ARMISD::VDUP)
14002 return SDValue();
14003
14004 SDValue VMov = N->getOperand(0);
14005 if (VMov->getOpcode() == ISD::BITCAST)
14006 VMov = VMov->getOperand(0);
14007
14008 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14009 return SDValue();
14010
14011 SDLoc dl(N);
14012 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14013 DCI.DAG.getConstant(0, dl, MVT::i32),
14014 VDup->getOperand(0));
14015 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14016}
14017
14018/// PerformVMULCombine
14019/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14020/// special multiplier accumulator forwarding.
14021/// vmul d3, d0, d2
14022/// vmla d3, d1, d2
14023/// is faster than
14024/// vadd d3, d0, d1
14025/// vmul d3, d3, d2
14026// However, for (A + B) * (A + B),
14027// vadd d2, d0, d1
14028// vmul d3, d0, d2
14029// vmla d3, d1, d2
14030// is slower than
14031// vadd d2, d0, d1
14032// vmul d3, d2, d2
14033static SDValue PerformVMULCombine(SDNode *N,
14034 TargetLowering::DAGCombinerInfo &DCI,
14035 const ARMSubtarget *Subtarget) {
14036 if (!Subtarget->hasVMLxForwarding())
14037 return SDValue();
14038
14039 SelectionDAG &DAG = DCI.DAG;
14040 SDValue N0 = N->getOperand(0);
14041 SDValue N1 = N->getOperand(1);
14042 unsigned Opcode = N0.getOpcode();
14043 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14044 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14045 Opcode = N1.getOpcode();
14046 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14047 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14048 return SDValue();
14049 std::swap(N0, N1);
14050 }
14051
14052 if (N0 == N1)
14053 return SDValue();
14054
14055 EVT VT = N->getValueType(0);
14056 SDLoc DL(N);
14057 SDValue N00 = N0->getOperand(0);
14058 SDValue N01 = N0->getOperand(1);
14059 return DAG.getNode(Opcode, DL, VT,
14060 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14061 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14062}
14063
14064static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG,
14065 const ARMSubtarget *Subtarget) {
14066 EVT VT = N->getValueType(0);
14067 if (VT != MVT::v2i64)
14068 return SDValue();
14069
14070 SDValue N0 = N->getOperand(0);
14071 SDValue N1 = N->getOperand(1);
14072
14073 auto IsSignExt = [&](SDValue Op) {
14074 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14075 return SDValue();
14076 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14077 if (VT.getScalarSizeInBits() == 32)
14078 return Op->getOperand(0);
14079 return SDValue();
14080 };
14081 auto IsZeroExt = [&](SDValue Op) {
14082 // Zero extends are a little more awkward. At the point we are matching
14083 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14084 // That might be before of after a bitcast depending on how the and is
14085 // placed. Because this has to look through bitcasts, it is currently only
14086 // supported on LE.
14087 if (!Subtarget->isLittle())
14088 return SDValue();
14089
14090 SDValue And = Op;
14091 if (And->getOpcode() == ISD::BITCAST)
14092 And = And->getOperand(0);
14093 if (And->getOpcode() != ISD::AND)
14094 return SDValue();
14095 SDValue Mask = And->getOperand(1);
14096 if (Mask->getOpcode() == ISD::BITCAST)
14097 Mask = Mask->getOperand(0);
14098
14099 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14100 Mask.getValueType() != MVT::v4i32)
14101 return SDValue();
14102 if (isAllOnesConstant(Mask->getOperand(0)) &&
14103 isNullConstant(Mask->getOperand(1)) &&
14104 isAllOnesConstant(Mask->getOperand(2)) &&
14105 isNullConstant(Mask->getOperand(3)))
14106 return And->getOperand(0);
14107 return SDValue();
14108 };
14109
14110 SDLoc dl(N);
14111 if (SDValue Op0 = IsSignExt(N0)) {
14112 if (SDValue Op1 = IsSignExt(N1)) {
14113 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14114 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14115 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14116 }
14117 }
14118 if (SDValue Op0 = IsZeroExt(N0)) {
14119 if (SDValue Op1 = IsZeroExt(N1)) {
14120 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14121 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14122 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14123 }
14124 }
14125
14126 return SDValue();
14127}
14128
14129static SDValue PerformMULCombine(SDNode *N,
14130 TargetLowering::DAGCombinerInfo &DCI,
14131 const ARMSubtarget *Subtarget) {
14132 SelectionDAG &DAG = DCI.DAG;
14133
14134 EVT VT = N->getValueType(0);
14135 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14136 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14137
14138 if (Subtarget->isThumb1Only())
14139 return SDValue();
14140
14141 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14142 return SDValue();
14143
14144 if (VT.is64BitVector() || VT.is128BitVector())
14145 return PerformVMULCombine(N, DCI, Subtarget);
14146 if (VT != MVT::i32)
14147 return SDValue();
14148
14149 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14150 if (!C)
14151 return SDValue();
14152
14153 int64_t MulAmt = C->getSExtValue();
14154 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14155
14156 ShiftAmt = ShiftAmt & (32 - 1);
14157 SDValue V = N->getOperand(0);
14158 SDLoc DL(N);
14159
14160 SDValue Res;
14161 MulAmt >>= ShiftAmt;
14162
14163 if (MulAmt >= 0) {
14164 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14165 // (mul x, 2^N + 1) => (add (shl x, N), x)
14166 Res = DAG.getNode(ISD::ADD, DL, VT,
14167 V,
14168 DAG.getNode(ISD::SHL, DL, VT,
14169 V,
14170 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14171 MVT::i32)));
14172 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14173 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14174 Res = DAG.getNode(ISD::SUB, DL, VT,
14175 DAG.getNode(ISD::SHL, DL, VT,
14176 V,
14177 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14178 MVT::i32)),
14179 V);
14180 } else
14181 return SDValue();
14182 } else {
14183 uint64_t MulAmtAbs = -MulAmt;
14184 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14185 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14186 Res = DAG.getNode(ISD::SUB, DL, VT,
14187 V,
14188 DAG.getNode(ISD::SHL, DL, VT,
14189 V,
14190 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14191 MVT::i32)));
14192 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14193 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14194 Res = DAG.getNode(ISD::ADD, DL, VT,
14195 V,
14196 DAG.getNode(ISD::SHL, DL, VT,
14197 V,
14198 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14199 MVT::i32)));
14200 Res = DAG.getNode(ISD::SUB, DL, VT,
14201 DAG.getConstant(0, DL, MVT::i32), Res);
14202 } else
14203 return SDValue();
14204 }
14205
14206 if (ShiftAmt != 0)
14207 Res = DAG.getNode(ISD::SHL, DL, VT,
14208 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14209
14210 // Do not add new nodes to DAG combiner worklist.
14211 DCI.CombineTo(N, Res, false);
14212 return SDValue();
14213}
14214
14215static SDValue CombineANDShift(SDNode *N,
14216 TargetLowering::DAGCombinerInfo &DCI,
14217 const ARMSubtarget *Subtarget) {
14218 // Allow DAGCombine to pattern-match before we touch the canonical form.
14219 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14220 return SDValue();
14221
14222 if (N->getValueType(0) != MVT::i32)
14223 return SDValue();
14224
14225 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14226 if (!N1C)
14227 return SDValue();
14228
14229 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14230 // Don't transform uxtb/uxth.
14231 if (C1 == 255 || C1 == 65535)
14232 return SDValue();
14233
14234 SDNode *N0 = N->getOperand(0).getNode();
14235 if (!N0->hasOneUse())
14236 return SDValue();
14237
14238 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14239 return SDValue();
14240
14241 bool LeftShift = N0->getOpcode() == ISD::SHL;
14242
14243 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14244 if (!N01C)
14245 return SDValue();
14246
14247 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14248 if (!C2 || C2 >= 32)
14249 return SDValue();
14250
14251 // Clear irrelevant bits in the mask.
14252 if (LeftShift)
14253 C1 &= (-1U << C2);
14254 else
14255 C1 &= (-1U >> C2);
14256
14257 SelectionDAG &DAG = DCI.DAG;
14258 SDLoc DL(N);
14259
14260 // We have a pattern of the form "(and (shl x, c2) c1)" or
14261 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14262 // transform to a pair of shifts, to save materializing c1.
14263
14264 // First pattern: right shift, then mask off leading bits.
14265 // FIXME: Use demanded bits?
14266 if (!LeftShift && isMask_32(C1)) {
14267 uint32_t C3 = llvm::countl_zero(C1);
14268 if (C2 < C3) {
14269 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14270 DAG.getConstant(C3 - C2, DL, MVT::i32));
14271 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14272 DAG.getConstant(C3, DL, MVT::i32));
14273 }
14274 }
14275
14276 // First pattern, reversed: left shift, then mask off trailing bits.
14277 if (LeftShift && isMask_32(~C1)) {
14278 uint32_t C3 = llvm::countr_zero(C1);
14279 if (C2 < C3) {
14280 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14281 DAG.getConstant(C3 - C2, DL, MVT::i32));
14282 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14283 DAG.getConstant(C3, DL, MVT::i32));
14284 }
14285 }
14286
14287 // Second pattern: left shift, then mask off leading bits.
14288 // FIXME: Use demanded bits?
14289 if (LeftShift && isShiftedMask_32(C1)) {
14290 uint32_t Trailing = llvm::countr_zero(C1);
14291 uint32_t C3 = llvm::countl_zero(C1);
14292 if (Trailing == C2 && C2 + C3 < 32) {
14293 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14294 DAG.getConstant(C2 + C3, DL, MVT::i32));
14295 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14296 DAG.getConstant(C3, DL, MVT::i32));
14297 }
14298 }
14299
14300 // Second pattern, reversed: right shift, then mask off trailing bits.
14301 // FIXME: Handle other patterns of known/demanded bits.
14302 if (!LeftShift && isShiftedMask_32(C1)) {
14303 uint32_t Leading = llvm::countl_zero(C1);
14304 uint32_t C3 = llvm::countr_zero(C1);
14305 if (Leading == C2 && C2 + C3 < 32) {
14306 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14307 DAG.getConstant(C2 + C3, DL, MVT::i32));
14308 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14309 DAG.getConstant(C3, DL, MVT::i32));
14310 }
14311 }
14312
14313 // FIXME: Transform "(and (shl x, c2) c1)" ->
14314 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
14315 // c1.
14316 return SDValue();
14317}
14318
14319static SDValue PerformANDCombine(SDNode *N,
14320 TargetLowering::DAGCombinerInfo &DCI,
14321 const ARMSubtarget *Subtarget) {
14322 // Attempt to use immediate-form VBIC
14323 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14324 SDLoc dl(N);
14325 EVT VT = N->getValueType(0);
14326 SelectionDAG &DAG = DCI.DAG;
14327
14328 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14329 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14330 return SDValue();
14331
14332 APInt SplatBits, SplatUndef;
14333 unsigned SplatBitSize;
14334 bool HasAnyUndefs;
14335 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14336 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14337 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14338 SplatBitSize == 64) {
14339 EVT VbicVT;
14340 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14341 SplatUndef.getZExtValue(), SplatBitSize,
14342 DAG, dl, VbicVT, VT, OtherModImm);
14343 if (Val.getNode()) {
14344 SDValue Input =
14345 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14346 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14347 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14348 }
14349 }
14350 }
14351
14352 if (!Subtarget->isThumb1Only()) {
14353 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14354 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14355 return Result;
14356
14357 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14358 return Result;
14359 }
14360
14361 if (Subtarget->isThumb1Only())
14362 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14363 return Result;
14364
14365 return SDValue();
14366}
14367
14368// Try combining OR nodes to SMULWB, SMULWT.
14369static SDValue PerformORCombineToSMULWBT(SDNode *OR,
14370 TargetLowering::DAGCombinerInfo &DCI,
14371 const ARMSubtarget *Subtarget) {
14372 if (!Subtarget->hasV6Ops() ||
14373 (Subtarget->isThumb() &&
14374 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14375 return SDValue();
14376
14377 SDValue SRL = OR->getOperand(0);
14378 SDValue SHL = OR->getOperand(1);
14379
14380 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14381 SRL = OR->getOperand(1);
14382 SHL = OR->getOperand(0);
14383 }
14384 if (!isSRL16(SRL) || !isSHL16(SHL))
14385 return SDValue();
14386
14387 // The first operands to the shifts need to be the two results from the
14388 // same smul_lohi node.
14389 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14390 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14391 return SDValue();
14392
14393 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14394 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14395 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14396 return SDValue();
14397
14398 // Now we have:
14399 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14400 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14401 // For SMUWB the 16-bit value will signed extended somehow.
14402 // For SMULWT only the SRA is required.
14403 // Check both sides of SMUL_LOHI
14404 SDValue OpS16 = SMULLOHI->getOperand(0);
14405 SDValue OpS32 = SMULLOHI->getOperand(1);
14406
14407 SelectionDAG &DAG = DCI.DAG;
14408 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14409 OpS16 = OpS32;
14410 OpS32 = SMULLOHI->getOperand(0);
14411 }
14412
14413 SDLoc dl(OR);
14414 unsigned Opcode = 0;
14415 if (isS16(OpS16, DAG))
14416 Opcode = ARMISD::SMULWB;
14417 else if (isSRA16(OpS16)) {
14418 Opcode = ARMISD::SMULWT;
14419 OpS16 = OpS16->getOperand(0);
14420 }
14421 else
14422 return SDValue();
14423
14424 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14425 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14426 return SDValue(OR, 0);
14427}
14428
14429static SDValue PerformORCombineToBFI(SDNode *N,
14430 TargetLowering::DAGCombinerInfo &DCI,
14431 const ARMSubtarget *Subtarget) {
14432 // BFI is only available on V6T2+
14433 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14434 return SDValue();
14435
14436 EVT VT = N->getValueType(0);
14437 SDValue N0 = N->getOperand(0);
14438 SDValue N1 = N->getOperand(1);
14439 SelectionDAG &DAG = DCI.DAG;
14440 SDLoc DL(N);
14441 // 1) or (and A, mask), val => ARMbfi A, val, mask
14442 // iff (val & mask) == val
14443 //
14444 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14445 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14446 // && mask == ~mask2
14447 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14448 // && ~mask == mask2
14449 // (i.e., copy a bitfield value into another bitfield of the same width)
14450
14451 if (VT != MVT::i32)
14452 return SDValue();
14453
14454 SDValue N00 = N0.getOperand(0);
14455
14456 // The value and the mask need to be constants so we can verify this is
14457 // actually a bitfield set. If the mask is 0xffff, we can do better
14458 // via a movt instruction, so don't use BFI in that case.
14459 SDValue MaskOp = N0.getOperand(1);
14460 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14461 if (!MaskC)
14462 return SDValue();
14463 unsigned Mask = MaskC->getZExtValue();
14464 if (Mask == 0xffff)
14465 return SDValue();
14466 SDValue Res;
14467 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14468 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14469 if (N1C) {
14470 unsigned Val = N1C->getZExtValue();
14471 if ((Val & ~Mask) != Val)
14472 return SDValue();
14473
14474 if (ARM::isBitFieldInvertedMask(Mask)) {
14475 Val >>= llvm::countr_zero(~Mask);
14476
14477 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14478 DAG.getConstant(Val, DL, MVT::i32),
14479 DAG.getConstant(Mask, DL, MVT::i32));
14480
14481 DCI.CombineTo(N, Res, false);
14482 // Return value from the original node to inform the combiner than N is
14483 // now dead.
14484 return SDValue(N, 0);
14485 }
14486 } else if (N1.getOpcode() == ISD::AND) {
14487 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14488 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14489 if (!N11C)
14490 return SDValue();
14491 unsigned Mask2 = N11C->getZExtValue();
14492
14493 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14494 // as is to match.
14495 if (ARM::isBitFieldInvertedMask(Mask) &&
14496 (Mask == ~Mask2)) {
14497 // The pack halfword instruction works better for masks that fit it,
14498 // so use that when it's available.
14499 if (Subtarget->hasDSP() &&
14500 (Mask == 0xffff || Mask == 0xffff0000))
14501 return SDValue();
14502 // 2a
14503 unsigned amt = llvm::countr_zero(Mask2);
14504 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14505 DAG.getConstant(amt, DL, MVT::i32));
14506 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14507 DAG.getConstant(Mask, DL, MVT::i32));
14508 DCI.CombineTo(N, Res, false);
14509 // Return value from the original node to inform the combiner than N is
14510 // now dead.
14511 return SDValue(N, 0);
14512 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14513 (~Mask == Mask2)) {
14514 // The pack halfword instruction works better for masks that fit it,
14515 // so use that when it's available.
14516 if (Subtarget->hasDSP() &&
14517 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14518 return SDValue();
14519 // 2b
14520 unsigned lsb = llvm::countr_zero(Mask);
14521 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14522 DAG.getConstant(lsb, DL, MVT::i32));
14523 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14524 DAG.getConstant(Mask2, DL, MVT::i32));
14525 DCI.CombineTo(N, Res, false);
14526 // Return value from the original node to inform the combiner than N is
14527 // now dead.
14528 return SDValue(N, 0);
14529 }
14530 }
14531
14532 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14533 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14534 ARM::isBitFieldInvertedMask(~Mask)) {
14535 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14536 // where lsb(mask) == #shamt and masked bits of B are known zero.
14537 SDValue ShAmt = N00.getOperand(1);
14538 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
14539 unsigned LSB = llvm::countr_zero(Mask);
14540 if (ShAmtC != LSB)
14541 return SDValue();
14542
14543 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14544 DAG.getConstant(~Mask, DL, MVT::i32));
14545
14546 DCI.CombineTo(N, Res, false);
14547 // Return value from the original node to inform the combiner than N is
14548 // now dead.
14549 return SDValue(N, 0);
14550 }
14551
14552 return SDValue();
14553}
14554
14555static bool isValidMVECond(unsigned CC, bool IsFloat) {
14556 switch (CC) {
14557 case ARMCC::EQ:
14558 case ARMCC::NE:
14559 case ARMCC::LE:
14560 case ARMCC::GT:
14561 case ARMCC::GE:
14562 case ARMCC::LT:
14563 return true;
14564 case ARMCC::HS:
14565 case ARMCC::HI:
14566 return !IsFloat;
14567 default:
14568 return false;
14569 };
14570}
14571
14572static ARMCC::CondCodes getVCMPCondCode(SDValue N) {
14573 if (N->getOpcode() == ARMISD::VCMP)
14574 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14575 else if (N->getOpcode() == ARMISD::VCMPZ)
14576 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14577 else
14578 llvm_unreachable("Not a VCMP/VCMPZ!")::llvm::llvm_unreachable_internal("Not a VCMP/VCMPZ!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14578)
;
14579}
14580
14581static bool CanInvertMVEVCMP(SDValue N) {
14582 ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N));
14583 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14584}
14585
14586static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG,
14587 const ARMSubtarget *Subtarget) {
14588 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14589 // together with predicates
14590 EVT VT = N->getValueType(0);
14591 SDLoc DL(N);
14592 SDValue N0 = N->getOperand(0);
14593 SDValue N1 = N->getOperand(1);
14594
14595 auto IsFreelyInvertable = [&](SDValue V) {
14596 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14597 return CanInvertMVEVCMP(V);
14598 return false;
14599 };
14600
14601 // At least one operand must be freely invertable.
14602 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14603 return SDValue();
14604
14605 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14606 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14607 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14608 return DAG.getLogicalNOT(DL, And, VT);
14609}
14610
14611/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14612static SDValue PerformORCombine(SDNode *N,
14613 TargetLowering::DAGCombinerInfo &DCI,
14614 const ARMSubtarget *Subtarget) {
14615 // Attempt to use immediate-form VORR
14616 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14617 SDLoc dl(N);
14618 EVT VT = N->getValueType(0);
14619 SelectionDAG &DAG = DCI.DAG;
14620
14621 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14622 return SDValue();
14623
14624 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14625 VT == MVT::v8i1 || VT == MVT::v16i1))
14626 return PerformORCombine_i1(N, DAG, Subtarget);
14627
14628 APInt SplatBits, SplatUndef;
14629 unsigned SplatBitSize;
14630 bool HasAnyUndefs;
14631 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14632 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14633 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14634 SplatBitSize == 64) {
14635 EVT VorrVT;
14636 SDValue Val =
14637 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14638 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14639 if (Val.getNode()) {
14640 SDValue Input =
14641 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14642 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14643 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14644 }
14645 }
14646 }
14647
14648 if (!Subtarget->isThumb1Only()) {
14649 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14650 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14651 return Result;
14652 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14653 return Result;
14654 }
14655
14656 SDValue N0 = N->getOperand(0);
14657 SDValue N1 = N->getOperand(1);
14658
14659 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14660 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14661 DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
14662
14663 // The code below optimizes (or (and X, Y), Z).
14664 // The AND operand needs to have a single user to make these optimizations
14665 // profitable.
14666 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14667 return SDValue();
14668
14669 APInt SplatUndef;
14670 unsigned SplatBitSize;
14671 bool HasAnyUndefs;
14672
14673 APInt SplatBits0, SplatBits1;
14674 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14675 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14676 // Ensure that the second operand of both ands are constants
14677 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14678 HasAnyUndefs) && !HasAnyUndefs) {
14679 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14680 HasAnyUndefs) && !HasAnyUndefs) {
14681 // Ensure that the bit width of the constants are the same and that
14682 // the splat arguments are logical inverses as per the pattern we
14683 // are trying to simplify.
14684 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14685 SplatBits0 == ~SplatBits1) {
14686 // Canonicalize the vector type to make instruction selection
14687 // simpler.
14688 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14689 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14690 N0->getOperand(1),
14691 N0->getOperand(0),
14692 N1->getOperand(0));
14693 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14694 }
14695 }
14696 }
14697 }
14698
14699 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14700 // reasonable.
14701 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14702 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14703 return Res;
14704 }
14705
14706 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14707 return Result;
14708
14709 return SDValue();
14710}
14711
14712static SDValue PerformXORCombine(SDNode *N,
14713 TargetLowering::DAGCombinerInfo &DCI,
14714 const ARMSubtarget *Subtarget) {
14715 EVT VT = N->getValueType(0);
14716 SelectionDAG &DAG = DCI.DAG;
14717
14718 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14719 return SDValue();
14720
14721 if (!Subtarget->isThumb1Only()) {
14722 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14723 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14724 return Result;
14725
14726 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14727 return Result;
14728 }
14729
14730 if (Subtarget->hasMVEIntegerOps()) {
14731 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14732 SDValue N0 = N->getOperand(0);
14733 SDValue N1 = N->getOperand(1);
14734 const TargetLowering *TLI = Subtarget->getTargetLowering();
14735 if (TLI->isConstTrueVal(N1) &&
14736 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14737 if (CanInvertMVEVCMP(N0)) {
14738 SDLoc DL(N0);
14739 ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0));
14740
14741 SmallVector<SDValue, 4> Ops;
14742 Ops.push_back(N0->getOperand(0));
14743 if (N0->getOpcode() == ARMISD::VCMP)
14744 Ops.push_back(N0->getOperand(1));
14745 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14746 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14747 }
14748 }
14749 }
14750
14751 return SDValue();
14752}
14753
14754// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14755// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14756// their position in "to" (Rd).
14757static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14758 assert(N->getOpcode() == ARMISD::BFI)(static_cast <bool> (N->getOpcode() == ARMISD::BFI) ?
void (0) : __assert_fail ("N->getOpcode() == ARMISD::BFI"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14758, __extension__
__PRETTY_FUNCTION__))
;
14759
14760 SDValue From = N->getOperand(1);
14761 ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
14762 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14763
14764 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14765 // #C in the base of the SHR.
14766 if (From->getOpcode() == ISD::SRL &&
14767 isa<ConstantSDNode>(From->getOperand(1))) {
14768 APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
14769 assert(Shift.getLimitedValue() < 32 && "Shift too large!")(static_cast <bool> (Shift.getLimitedValue() < 32 &&
"Shift too large!") ? void (0) : __assert_fail ("Shift.getLimitedValue() < 32 && \"Shift too large!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14769, __extension__
__PRETTY_FUNCTION__))
;
14770 FromMask <<= Shift.getLimitedValue(31);
14771 From = From->getOperand(0);
14772 }
14773
14774 return From;
14775}
14776
14777// If A and B contain one contiguous set of bits, does A | B == A . B?
14778//
14779// Neither A nor B must be zero.
14780static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14781 unsigned LastActiveBitInA = A.countr_zero();
14782 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14783 return LastActiveBitInA - 1 == FirstActiveBitInB;
14784}
14785
14786static SDValue FindBFIToCombineWith(SDNode *N) {
14787 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14788 APInt ToMask, FromMask;
14789 SDValue From = ParseBFI(N, ToMask, FromMask);
14790 SDValue To = N->getOperand(0);
14791
14792 SDValue V = To;
14793 if (V.getOpcode() != ARMISD::BFI)
14794 return SDValue();
14795
14796 APInt NewToMask, NewFromMask;
14797 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14798 if (NewFrom != From)
14799 return SDValue();
14800
14801 // Do the written bits conflict with any we've seen so far?
14802 if ((NewToMask & ToMask).getBoolValue())
14803 // Conflicting bits.
14804 return SDValue();
14805
14806 // Are the new bits contiguous when combined with the old bits?
14807 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14808 BitsProperlyConcatenate(FromMask, NewFromMask))
14809 return V;
14810 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14811 BitsProperlyConcatenate(NewFromMask, FromMask))
14812 return V;
14813
14814 return SDValue();
14815}
14816
14817static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
14818 SDValue N0 = N->getOperand(0);
14819 SDValue N1 = N->getOperand(1);
14820
14821 if (N1.getOpcode() == ISD::AND) {
14822 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14823 // the bits being cleared by the AND are not demanded by the BFI.
14824 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14825 if (!N11C)
14826 return SDValue();
14827 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
14828 unsigned LSB = llvm::countr_zero(~InvMask);
14829 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14830 assert(Width <(static_cast <bool> (Width < static_cast<unsigned
>(std::numeric_limits<unsigned>::digits) && "undefined behavior"
) ? void (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14832, __extension__
__PRETTY_FUNCTION__))
14831 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast <bool> (Width < static_cast<unsigned
>(std::numeric_limits<unsigned>::digits) && "undefined behavior"
) ? void (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14832, __extension__
__PRETTY_FUNCTION__))
14832 "undefined behavior")(static_cast <bool> (Width < static_cast<unsigned
>(std::numeric_limits<unsigned>::digits) && "undefined behavior"
) ? void (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14832, __extension__
__PRETTY_FUNCTION__))
;
14833 unsigned Mask = (1u << Width) - 1;
14834 unsigned Mask2 = N11C->getZExtValue();
14835 if ((Mask & (~Mask2)) == 0)
14836 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14837 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14838 return SDValue();
14839 }
14840
14841 // Look for another BFI to combine with.
14842 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14843 // We've found a BFI.
14844 APInt ToMask1, FromMask1;
14845 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14846
14847 APInt ToMask2, FromMask2;
14848 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14849 assert(From1 == From2)(static_cast <bool> (From1 == From2) ? void (0) : __assert_fail
("From1 == From2", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14849, __extension__ __PRETTY_FUNCTION__))
;
14850 (void)From2;
14851
14852 // Create a new BFI, combining the two together.
14853 APInt NewFromMask = FromMask1 | FromMask2;
14854 APInt NewToMask = ToMask1 | ToMask2;
14855
14856 EVT VT = N->getValueType(0);
14857 SDLoc dl(N);
14858
14859 if (NewFromMask[0] == 0)
14860 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14861 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14862 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14863 DAG.getConstant(~NewToMask, dl, VT));
14864 }
14865
14866 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14867 // that lower bit insertions are performed first, providing that M1 and M2
14868 // do no overlap. This can allow multiple BFI instructions to be combined
14869 // together by the other folds above.
14870 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14871 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14872 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14873
14874 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14875 ToMask1.countl_zero() < ToMask2.countl_zero())
14876 return SDValue();
14877
14878 EVT VT = N->getValueType(0);
14879 SDLoc dl(N);
14880 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14881 N->getOperand(1), N->getOperand(2));
14882 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14883 N0.getOperand(2));
14884 }
14885
14886 return SDValue();
14887}
14888
14889// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14890// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14891// return X if valid.
14892static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
14893 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14894 return SDValue();
14895 SDValue CSInc = Cmp->getOperand(0);
14896
14897 // Ignore any `And 1` nodes that may not yet have been removed. We are
14898 // looking for a value that produces 1/0, so these have no effect on the
14899 // code.
14900 while (CSInc.getOpcode() == ISD::AND &&
14901 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14902 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14903 CSInc = CSInc.getOperand(0);
14904
14905 if (CSInc.getOpcode() == ARMISD::CSINC &&
14906 isNullConstant(CSInc.getOperand(0)) &&
14907 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14908 CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
14909 return CSInc.getOperand(3);
14910 }
14911 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14912 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14913 CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
14914 return CSInc.getOperand(4);
14915 }
14916 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14917 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14918 CC = ARMCC::getOppositeCondition(
14919 (ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
14920 return CSInc.getOperand(4);
14921 }
14922 return SDValue();
14923}
14924
14925static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
14926 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
14927 // t92: glue = ARMISD::CMPZ t74, 0
14928 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
14929 // t96: glue = ARMISD::CMPZ t93, 0
14930 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
14931 ARMCC::CondCodes Cond;
14932 if (SDValue C = IsCMPZCSINC(N, Cond))
14933 if (Cond == ARMCC::EQ)
14934 return C;
14935 return SDValue();
14936}
14937
14938static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG) {
14939 // Fold away an unneccessary CMPZ/CSINC
14940 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
14941 // if C1==EQ -> CSXYZ A, B, C2, D
14942 // if C1==NE -> CSXYZ A, B, NOT(C2), D
14943 ARMCC::CondCodes Cond;
14944 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
14945 if (N->getConstantOperandVal(2) == ARMCC::EQ)
14946 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14947 N->getOperand(1),
14948 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
14949 if (N->getConstantOperandVal(2) == ARMCC::NE)
14950 return DAG.getNode(
14951 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14952 N->getOperand(1),
14953 DAG.getConstant(ARMCC::getOppositeCondition(Cond), SDLoc(N), MVT::i32), C);
14954 }
14955 return SDValue();
14956}
14957
14958/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
14959/// ARMISD::VMOVRRD.
14960static SDValue PerformVMOVRRDCombine(SDNode *N,
14961 TargetLowering::DAGCombinerInfo &DCI,
14962 const ARMSubtarget *Subtarget) {
14963 // vmovrrd(vmovdrr x, y) -> x,y
14964 SDValue InDouble = N->getOperand(0);
14965 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
14966 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
14967
14968 // vmovrrd(load f64) -> (load i32), (load i32)
14969 SDNode *InNode = InDouble.getNode();
14970 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
14971 InNode->getValueType(0) == MVT::f64 &&
14972 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
14973 !cast<LoadSDNode>(InNode)->isVolatile()) {
14974 // TODO: Should this be done for non-FrameIndex operands?
14975 LoadSDNode *LD = cast<LoadSDNode>(InNode);
14976
14977 SelectionDAG &DAG = DCI.DAG;
14978 SDLoc DL(LD);
14979 SDValue BasePtr = LD->getBasePtr();
14980 SDValue NewLD1 =
14981 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
14982 LD->getAlign(), LD->getMemOperand()->getFlags());
14983
14984 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
14985 DAG.getConstant(4, DL, MVT::i32));
14986
14987 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
14988 LD->getPointerInfo().getWithOffset(4),
14989 commonAlignment(LD->getAlign(), 4),
14990 LD->getMemOperand()->getFlags());
14991
14992 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
14993 if (DCI.DAG.getDataLayout().isBigEndian())
14994 std::swap (NewLD1, NewLD2);
14995 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
14996 return Result;
14997 }
14998
14999 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15000 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15001 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15002 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15003 SDValue BV = InDouble.getOperand(0);
15004 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15005 // change lane order under big endian.
15006 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15007 while (
15008 (BV.getOpcode() == ISD::BITCAST ||
15009 BV.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
15010 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15011 BVSwap = BV.getOpcode() == ISD::BITCAST;
15012 BV = BV.getOperand(0);
15013 }
15014 if (BV.getValueType() != MVT::v4i32)
15015 return SDValue();
15016
15017 // Handle buildvectors, pulling out the correct lane depending on
15018 // endianness.
15019 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15020 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15021 SDValue Op0 = BV.getOperand(Offset);
15022 SDValue Op1 = BV.getOperand(Offset + 1);
15023 if (!Subtarget->isLittle() && BVSwap)
15024 std::swap(Op0, Op1);
15025
15026 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15027 }
15028
15029 // A chain of insert_vectors, grabbing the correct value of the chain of
15030 // inserts.
15031 SDValue Op0, Op1;
15032 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15033 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15034 if (BV.getConstantOperandVal(2) == Offset)
15035 Op0 = BV.getOperand(1);
15036 if (BV.getConstantOperandVal(2) == Offset + 1)
15037 Op1 = BV.getOperand(1);
15038 }
15039 BV = BV.getOperand(0);
15040 }
15041 if (!Subtarget->isLittle() && BVSwap)
15042 std::swap(Op0, Op1);
15043 if (Op0 && Op1)
15044 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15045 }
15046
15047 return SDValue();
15048}
15049
15050/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15051/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15052static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
15053 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15054 SDValue Op0 = N->getOperand(0);
15055 SDValue Op1 = N->getOperand(1);
15056 if (Op0.getOpcode() == ISD::BITCAST)
15057 Op0 = Op0.getOperand(0);
15058 if (Op1.getOpcode() == ISD::BITCAST)
15059 Op1 = Op1.getOperand(0);
15060 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15061 Op0.getNode() == Op1.getNode() &&
15062 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15063 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15064 N->getValueType(0), Op0.getOperand(0));
15065 return SDValue();
15066}
15067
15068static SDValue PerformVMOVhrCombine(SDNode *N,
15069 TargetLowering::DAGCombinerInfo &DCI) {
15070 SDValue Op0 = N->getOperand(0);
15071
15072 // VMOVhr (VMOVrh (X)) -> X
15073 if (Op0->getOpcode() == ARMISD::VMOVrh)
15074 return Op0->getOperand(0);
15075
15076 // FullFP16: half values are passed in S-registers, and we don't
15077 // need any of the bitcast and moves:
15078 //
15079 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15080 // t5: i32 = bitcast t2
15081 // t18: f16 = ARMISD::VMOVhr t5
15082 // =>
15083 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15084 if (Op0->getOpcode() == ISD::BITCAST) {
15085 SDValue Copy = Op0->getOperand(0);
15086 if (Copy.getValueType() == MVT::f32 &&
15087 Copy->getOpcode() == ISD::CopyFromReg) {
15088 bool HasGlue = Copy->getNumOperands() == 3;
15089 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15090 HasGlue ? Copy->getOperand(2) : SDValue()};
15091 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15092 SDValue NewCopy =
15093 DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N),
15094 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15095 ArrayRef(Ops, HasGlue ? 3 : 2));
15096
15097 // Update Users, Chains, and Potential Glue.
15098 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15099 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15100 if (HasGlue)
15101 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15102 NewCopy.getValue(2));
15103
15104 return NewCopy;
15105 }
15106 }
15107
15108 // fold (VMOVhr (load x)) -> (load (f16*)x)
15109 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15110 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15111 LN0->getMemoryVT() == MVT::i16) {
15112 SDValue Load =
15113 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15114 LN0->getBasePtr(), LN0->getMemOperand());
15115 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15116 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15117 return Load;
15118 }
15119 }
15120
15121 // Only the bottom 16 bits of the source register are used.
15122 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15123 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15124 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15125 return SDValue(N, 0);
15126
15127 return SDValue();
15128}
15129
15130static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG) {
15131 SDValue N0 = N->getOperand(0);
15132 EVT VT = N->getValueType(0);
15133
15134 // fold (VMOVrh (fpconst x)) -> const x
15135 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15136 APFloat V = C->getValueAPF();
15137 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15138 }
15139
15140 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15141 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15142 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15143
15144 SDValue Load =
15145 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15146 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15147 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15148 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15149 return Load;
15150 }
15151
15152 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15153 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15154 isa<ConstantSDNode>(N0->getOperand(1)))
15155 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15156 N0->getOperand(1));
15157
15158 return SDValue();
15159}
15160
15161/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15162/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15163/// i64 vector to have f64 elements, since the value can then be loaded
15164/// directly into a VFP register.
15165static bool hasNormalLoadOperand(SDNode *N) {
15166 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15167 for (unsigned i = 0; i < NumElts; ++i) {
15168 SDNode *Elt = N->getOperand(i).getNode();
15169 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15170 return true;
15171 }
15172 return false;
15173}
15174
15175/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15176/// ISD::BUILD_VECTOR.
15177static SDValue PerformBUILD_VECTORCombine(SDNode *N,
15178 TargetLowering::DAGCombinerInfo &DCI,
15179 const ARMSubtarget *Subtarget) {
15180 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15181 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15182 // into a pair of GPRs, which is fine when the value is used as a scalar,
15183 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15184 SelectionDAG &DAG = DCI.DAG;
15185 if (N->getNumOperands() == 2)
15186 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15187 return RV;
15188
15189 // Load i64 elements as f64 values so that type legalization does not split
15190 // them up into i32 values.
15191 EVT VT = N->getValueType(0);
15192 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15193 return SDValue();
15194 SDLoc dl(N);
15195 SmallVector<SDValue, 8> Ops;
15196 unsigned NumElts = VT.getVectorNumElements();
15197 for (unsigned i = 0; i < NumElts; ++i) {
15198 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15199 Ops.push_back(V);
15200 // Make the DAGCombiner fold the bitcast.
15201 DCI.AddToWorklist(V.getNode());
15202 }
15203 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15204 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15205 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15206}
15207
15208/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15209static SDValue
15210PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15211 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15212 // At that time, we may have inserted bitcasts from integer to float.
15213 // If these bitcasts have survived DAGCombine, change the lowering of this
15214 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15215 // force to use floating point types.
15216
15217 // Make sure we can change the type of the vector.
15218 // This is possible iff:
15219 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15220 // 1.1. Vector is used only once.
15221 // 1.2. Use is a bit convert to an integer type.
15222 // 2. The size of its operands are 32-bits (64-bits are not legal).
15223 EVT VT = N->getValueType(0);
15224 EVT EltVT = VT.getVectorElementType();
15225
15226 // Check 1.1. and 2.
15227 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15228 return SDValue();
15229
15230 // By construction, the input type must be float.
15231 assert(EltVT == MVT::f32 && "Unexpected type!")(static_cast <bool> (EltVT == MVT::f32 && "Unexpected type!"
) ? void (0) : __assert_fail ("EltVT == MVT::f32 && \"Unexpected type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15231, __extension__
__PRETTY_FUNCTION__))
;
15232
15233 // Check 1.2.
15234 SDNode *Use = *N->use_begin();
15235 if (Use->getOpcode() != ISD::BITCAST ||
15236 Use->getValueType(0).isFloatingPoint())
15237 return SDValue();
15238
15239 // Check profitability.
15240 // Model is, if more than half of the relevant operands are bitcast from
15241 // i32, turn the build_vector into a sequence of insert_vector_elt.
15242 // Relevant operands are everything that is not statically
15243 // (i.e., at compile time) bitcasted.
15244 unsigned NumOfBitCastedElts = 0;
15245 unsigned NumElts = VT.getVectorNumElements();
15246 unsigned NumOfRelevantElts = NumElts;
15247 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15248 SDValue Elt = N->getOperand(Idx);
15249 if (Elt->getOpcode() == ISD::BITCAST) {
15250 // Assume only bit cast to i32 will go away.
15251 if (Elt->getOperand(0).getValueType() == MVT::i32)
15252 ++NumOfBitCastedElts;
15253 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15254 // Constants are statically casted, thus do not count them as
15255 // relevant operands.
15256 --NumOfRelevantElts;
15257 }
15258
15259 // Check if more than half of the elements require a non-free bitcast.
15260 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15261 return SDValue();
15262
15263 SelectionDAG &DAG = DCI.DAG;
15264 // Create the new vector type.
15265 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15266 // Check if the type is legal.
15267 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15268 if (!TLI.isTypeLegal(VecVT))
15269 return SDValue();
15270
15271 // Combine:
15272 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15273 // => BITCAST INSERT_VECTOR_ELT
15274 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15275 // (BITCAST EN), N.
15276 SDValue Vec = DAG.getUNDEF(VecVT);
15277 SDLoc dl(N);
15278 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15279 SDValue V = N->getOperand(Idx);
15280 if (V.isUndef())
15281 continue;
15282 if (V.getOpcode() == ISD::BITCAST &&
15283 V->getOperand(0).getValueType() == MVT::i32)
15284 // Fold obvious case.
15285 V = V.getOperand(0);
15286 else {
15287 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15288 // Make the DAGCombiner fold the bitcasts.
15289 DCI.AddToWorklist(V.getNode());
15290 }
15291 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15292 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15293 }
15294 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15295 // Make the DAGCombiner fold the bitcasts.
15296 DCI.AddToWorklist(Vec.getNode());
15297 return Vec;
15298}
15299
15300static SDValue
15301PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15302 EVT VT = N->getValueType(0);
15303 SDValue Op = N->getOperand(0);
15304 SDLoc dl(N);
15305
15306 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15307 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15308 // If the valuetypes are the same, we can remove the cast entirely.
15309 if (Op->getOperand(0).getValueType() == VT)
15310 return Op->getOperand(0);
15311 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15312 }
15313
15314 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15315 // more VPNOT which might get folded as else predicates.
15316 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15317 SDValue X =
15318 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15319 SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
15320 DCI.DAG.getConstant(65535, dl, MVT::i32));
15321 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15322 }
15323
15324 // Only the bottom 16 bits of the source register are used.
15325 if (Op.getValueType() == MVT::i32) {
15326 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15327 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15328 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15329 return SDValue(N, 0);
15330 }
15331 return SDValue();
15332}
15333
15334static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG,
15335 const ARMSubtarget *ST) {
15336 EVT VT = N->getValueType(0);
15337 SDValue Op = N->getOperand(0);
15338 SDLoc dl(N);
15339
15340 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15341 if (ST->isLittle())
15342 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15343
15344 // VECTOR_REG_CAST undef -> undef
15345 if (Op.isUndef())
15346 return DAG.getUNDEF(VT);
15347
15348 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15349 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15350 // If the valuetypes are the same, we can remove the cast entirely.
15351 if (Op->getOperand(0).getValueType() == VT)
15352 return Op->getOperand(0);
15353 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15354 }
15355
15356 return SDValue();
15357}
15358
15359static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG,
15360 const ARMSubtarget *Subtarget) {
15361 if (!Subtarget->hasMVEIntegerOps())
15362 return SDValue();
15363
15364 EVT VT = N->getValueType(0);
15365 SDValue Op0 = N->getOperand(0);
15366 SDValue Op1 = N->getOperand(1);
15367 ARMCC::CondCodes Cond =
15368 (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
15369 SDLoc dl(N);
15370
15371 // vcmp X, 0, cc -> vcmpz X, cc
15372 if (isZeroVector(Op1))
15373 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15374
15375 unsigned SwappedCond = getSwappedCondition(Cond);
15376 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15377 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15378 if (isZeroVector(Op0))
15379 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15380 DAG.getConstant(SwappedCond, dl, MVT::i32));
15381 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15382 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15383 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15384 DAG.getConstant(SwappedCond, dl, MVT::i32));
15385 }
15386
15387 return SDValue();
15388}
15389
15390/// PerformInsertEltCombine - Target-specific dag combine xforms for
15391/// ISD::INSERT_VECTOR_ELT.
15392static SDValue PerformInsertEltCombine(SDNode *N,
15393 TargetLowering::DAGCombinerInfo &DCI) {
15394 // Bitcast an i64 load inserted into a vector to f64.
15395 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15396 EVT VT = N->getValueType(0);
15397 SDNode *Elt = N->getOperand(1).getNode();
15398 if (VT.getVectorElementType() != MVT::i64 ||
15399 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15400 return SDValue();
15401
15402 SelectionDAG &DAG = DCI.DAG;
15403 SDLoc dl(N);
15404 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15405 VT.getVectorNumElements());
15406 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15407 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15408 // Make the DAGCombiner fold the bitcasts.
15409 DCI.AddToWorklist(Vec.getNode());
15410 DCI.AddToWorklist(V.getNode());
15411 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15412 Vec, V, N->getOperand(2));
15413 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15414}
15415
15416// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15417// directly or bitcast to an integer if the original is a float vector.
15418// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15419// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15420static SDValue
15421PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15422 EVT VT = N->getValueType(0);
15423 SDLoc dl(N);
15424
15425 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15426 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15427 return SDValue();
15428
15429 SDValue Ext = SDValue(N, 0);
15430 if (Ext.getOpcode() == ISD::BITCAST &&
15431 Ext.getOperand(0).getValueType() == MVT::f32)
15432 Ext = Ext.getOperand(0);
15433 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15434 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15435 Ext.getConstantOperandVal(1) % 2 != 0)
15436 return SDValue();
15437 if (Ext->use_size() == 1 &&
15438 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15439 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15440 return SDValue();
15441
15442 SDValue Op0 = Ext.getOperand(0);
15443 EVT VecVT = Op0.getValueType();
15444 unsigned ResNo = Op0.getResNo();
15445 unsigned Lane = Ext.getConstantOperandVal(1);
15446 if (VecVT.getVectorNumElements() != 4)
15447 return SDValue();
15448
15449 // Find another extract, of Lane + 1
15450 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15451 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15452 isa<ConstantSDNode>(V->getOperand(1)) &&
15453 V->getConstantOperandVal(1) == Lane + 1 &&
15454 V->getOperand(0).getResNo() == ResNo;
15455 });
15456 if (OtherIt == Op0->uses().end())
15457 return SDValue();
15458
15459 // For float extracts, we need to be converting to a i32 for both vector
15460 // lanes.
15461 SDValue OtherExt(*OtherIt, 0);
15462 if (OtherExt.getValueType() != MVT::i32) {
15463 if (OtherExt->use_size() != 1 ||
15464 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15465 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15466 return SDValue();
15467 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15468 }
15469
15470 // Convert the type to a f64 and extract with a VMOVRRD.
15471 SDValue F64 = DCI.DAG.getNode(
15472 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15473 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15474 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15475 SDValue VMOVRRD =
15476 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15477
15478 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15479 return VMOVRRD;
15480}
15481
15482static SDValue PerformExtractEltCombine(SDNode *N,
15483 TargetLowering::DAGCombinerInfo &DCI,
15484 const ARMSubtarget *ST) {
15485 SDValue Op0 = N->getOperand(0);
15486 EVT VT = N->getValueType(0);
15487 SDLoc dl(N);
15488
15489 // extract (vdup x) -> x
15490 if (Op0->getOpcode() == ARMISD::VDUP) {
15491 SDValue X = Op0->getOperand(0);
15492 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15493 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15494 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15495 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15496 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15497 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15498
15499 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15500 X = X->getOperand(0);
15501 if (X.getValueType() == VT)
15502 return X;
15503 }
15504
15505 // extract ARM_BUILD_VECTOR -> x
15506 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15507 isa<ConstantSDNode>(N->getOperand(1)) &&
15508 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15509 return Op0.getOperand(N->getConstantOperandVal(1));
15510 }
15511
15512 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15513 if (Op0.getValueType() == MVT::v4i32 &&
15514 isa<ConstantSDNode>(N->getOperand(1)) &&
15515 Op0.getOpcode() == ISD::BITCAST &&
15516 Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
15517 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15518 SDValue BV = Op0.getOperand(0);
15519 unsigned Offset = N->getConstantOperandVal(1);
15520 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15521 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15522 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15523 }
15524
15525 // extract x, n; extract x, n+1 -> VMOVRRD x
15526 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15527 return R;
15528
15529 // extract (MVETrunc(x)) -> extract x
15530 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15531 unsigned Idx = N->getConstantOperandVal(1);
15532 unsigned Vec =
15533 Idx / Op0->getOperand(0).getValueType().getVectorNumElements();
15534 unsigned SubIdx =
15535 Idx % Op0->getOperand(0).getValueType().getVectorNumElements();
15536 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15537 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15538 }
15539
15540 return SDValue();
15541}
15542
15543static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
15544 SDValue Op = N->getOperand(0);
15545 EVT VT = N->getValueType(0);
15546
15547 // sext_inreg(VGETLANEu) -> VGETLANEs
15548 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15549 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15550 Op.getOperand(0).getValueType().getScalarType())
15551 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15552 Op.getOperand(1));
15553
15554 return SDValue();
15555}
15556
15557static SDValue
15558PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15559 SDValue Vec = N->getOperand(0);
15560 SDValue SubVec = N->getOperand(1);
15561 uint64_t IdxVal = N->getConstantOperandVal(2);
15562 EVT VecVT = Vec.getValueType();
15563 EVT SubVT = SubVec.getValueType();
15564
15565 // Only do this for legal fixed vector types.
15566 if (!VecVT.isFixedLengthVector() ||
15567 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15568 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
15569 return SDValue();
15570
15571 // Ignore widening patterns.
15572 if (IdxVal == 0 && Vec.isUndef())
15573 return SDValue();
15574
15575 // Subvector must be half the width and an "aligned" insertion.
15576 unsigned NumSubElts = SubVT.getVectorNumElements();
15577 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15578 (IdxVal != 0 && IdxVal != NumSubElts))
15579 return SDValue();
15580
15581 // Fold insert_subvector -> concat_vectors
15582 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15583 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15584 SDLoc DL(N);
15585 SDValue Lo, Hi;
15586 if (IdxVal == 0) {
15587 Lo = SubVec;
15588 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15589 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15590 } else {
15591 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15592 DCI.DAG.getVectorIdxConstant(0, DL));
15593 Hi = SubVec;
15594 }
15595 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15596}
15597
15598// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15599static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,
15600 SelectionDAG &DAG) {
15601 SDValue Trunc = N->getOperand(0);
15602 EVT VT = Trunc.getValueType();
15603 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15604 return SDValue();
15605
15606 SDLoc DL(Trunc);
15607 if (isVMOVNTruncMask(N->getMask(), VT, false))
15608 return DAG.getNode(
15609 ARMISD::VMOVN, DL, VT,
15610 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15611 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15612 DAG.getConstant(1, DL, MVT::i32));
15613 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15614 return DAG.getNode(
15615 ARMISD::VMOVN, DL, VT,
15616 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15617 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15618 DAG.getConstant(1, DL, MVT::i32));
15619 return SDValue();
15620}
15621
15622/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15623/// ISD::VECTOR_SHUFFLE.
15624static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
15625 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15626 return R;
15627
15628 // The LLVM shufflevector instruction does not require the shuffle mask
15629 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15630 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15631 // operands do not match the mask length, they are extended by concatenating
15632 // them with undef vectors. That is probably the right thing for other
15633 // targets, but for NEON it is better to concatenate two double-register
15634 // size vector operands into a single quad-register size vector. Do that
15635 // transformation here:
15636 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15637 // shuffle(concat(v1, v2), undef)
15638 SDValue Op0 = N->getOperand(0);
15639 SDValue Op1 = N->getOperand(1);
15640 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15641 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15642 Op0.getNumOperands() != 2 ||
15643 Op1.getNumOperands() != 2)
15644 return SDValue();
15645 SDValue Concat0Op1 = Op0.getOperand(1);
15646 SDValue Concat1Op1 = Op1.getOperand(1);
15647 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15648 return SDValue();
15649 // Skip the transformation if any of the types are illegal.
15650 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15651 EVT VT = N->getValueType(0);
15652 if (!TLI.isTypeLegal(VT) ||
15653 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15654 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15655 return SDValue();
15656
15657 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15658 Op0.getOperand(0), Op1.getOperand(0));
15659 // Translate the shuffle mask.
15660 SmallVector<int, 16> NewMask;
15661 unsigned NumElts = VT.getVectorNumElements();
15662 unsigned HalfElts = NumElts/2;
15663 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15664 for (unsigned n = 0; n < NumElts; ++n) {
15665 int MaskElt = SVN->getMaskElt(n);
15666 int NewElt = -1;
15667 if (MaskElt < (int)HalfElts)
15668 NewElt = MaskElt;
15669 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15670 NewElt = HalfElts + MaskElt - NumElts;
15671 NewMask.push_back(NewElt);
15672 }
15673 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15674 DAG.getUNDEF(VT), NewMask);
15675}
15676
15677/// Load/store instruction that can be merged with a base address
15678/// update
15679struct BaseUpdateTarget {
15680 SDNode *N;
15681 bool isIntrinsic;
15682 bool isStore;
15683 unsigned AddrOpIdx;
15684};
15685
15686struct BaseUpdateUser {
15687 /// Instruction that updates a pointer
15688 SDNode *N;
15689 /// Pointer increment operand
15690 SDValue Inc;
15691 /// Pointer increment value if it is a constant, or 0 otherwise
15692 unsigned ConstInc;
15693};
15694
15695static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
15696 struct BaseUpdateUser &User,
15697 bool SimpleConstIncOnly,
15698 TargetLowering::DAGCombinerInfo &DCI) {
15699 SelectionDAG &DAG = DCI.DAG;
15700 SDNode *N = Target.N;
15701 MemSDNode *MemN = cast<MemSDNode>(N);
15702 SDLoc dl(N);
15703
15704 // Find the new opcode for the updating load/store.
15705 bool isLoadOp = true;
15706 bool isLaneOp = false;
15707 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15708 // as an operand.
15709 bool hasAlignment = true;
15710 unsigned NewOpc = 0;
15711 unsigned NumVecs = 0;
15712 if (Target.isIntrinsic) {
15713 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15714 switch (IntNo) {
15715 default:
15716 llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15716)
;
15717 case Intrinsic::arm_neon_vld1:
15718 NewOpc = ARMISD::VLD1_UPD;
15719 NumVecs = 1;
15720 break;
15721 case Intrinsic::arm_neon_vld2:
15722 NewOpc = ARMISD::VLD2_UPD;
15723 NumVecs = 2;
15724 break;
15725 case Intrinsic::arm_neon_vld3:
15726 NewOpc = ARMISD::VLD3_UPD;
15727 NumVecs = 3;
15728 break;
15729 case Intrinsic::arm_neon_vld4:
15730 NewOpc = ARMISD::VLD4_UPD;
15731 NumVecs = 4;
15732 break;
15733 case Intrinsic::arm_neon_vld1x2:
15734 NewOpc = ARMISD::VLD1x2_UPD;
15735 NumVecs = 2;
15736 hasAlignment = false;
15737 break;
15738 case Intrinsic::arm_neon_vld1x3:
15739 NewOpc = ARMISD::VLD1x3_UPD;
15740 NumVecs = 3;
15741 hasAlignment = false;
15742 break;
15743 case Intrinsic::arm_neon_vld1x4:
15744 NewOpc = ARMISD::VLD1x4_UPD;
15745 NumVecs = 4;
15746 hasAlignment = false;
15747 break;
15748 case Intrinsic::arm_neon_vld2dup:
15749 NewOpc = ARMISD::VLD2DUP_UPD;
15750 NumVecs = 2;
15751 break;
15752 case Intrinsic::arm_neon_vld3dup:
15753 NewOpc = ARMISD::VLD3DUP_UPD;
15754 NumVecs = 3;
15755 break;
15756 case Intrinsic::arm_neon_vld4dup:
15757 NewOpc = ARMISD::VLD4DUP_UPD;
15758 NumVecs = 4;
15759 break;
15760 case Intrinsic::arm_neon_vld2lane:
15761 NewOpc = ARMISD::VLD2LN_UPD;
15762 NumVecs = 2;
15763 isLaneOp = true;
15764 break;
15765 case Intrinsic::arm_neon_vld3lane:
15766 NewOpc = ARMISD::VLD3LN_UPD;
15767 NumVecs = 3;
15768 isLaneOp = true;
15769 break;
15770 case Intrinsic::arm_neon_vld4lane:
15771 NewOpc = ARMISD::VLD4LN_UPD;
15772 NumVecs = 4;
15773 isLaneOp = true;
15774 break;
15775 case Intrinsic::arm_neon_vst1:
15776 NewOpc = ARMISD::VST1_UPD;
15777 NumVecs = 1;
15778 isLoadOp = false;
15779 break;
15780 case Intrinsic::arm_neon_vst2:
15781 NewOpc = ARMISD::VST2_UPD;
15782 NumVecs = 2;
15783 isLoadOp = false;
15784 break;
15785 case Intrinsic::arm_neon_vst3:
15786 NewOpc = ARMISD::VST3_UPD;
15787 NumVecs = 3;
15788 isLoadOp = false;
15789 break;
15790 case Intrinsic::arm_neon_vst4:
15791 NewOpc = ARMISD::VST4_UPD;
15792 NumVecs = 4;
15793 isLoadOp = false;
15794 break;
15795 case Intrinsic::arm_neon_vst2lane:
15796 NewOpc = ARMISD::VST2LN_UPD;
15797 NumVecs = 2;
15798 isLoadOp = false;
15799 isLaneOp = true;
15800 break;
15801 case Intrinsic::arm_neon_vst3lane:
15802 NewOpc = ARMISD::VST3LN_UPD;
15803 NumVecs = 3;
15804 isLoadOp = false;
15805 isLaneOp = true;
15806 break;
15807 case Intrinsic::arm_neon_vst4lane:
15808 NewOpc = ARMISD::VST4LN_UPD;
15809 NumVecs = 4;
15810 isLoadOp = false;
15811 isLaneOp = true;
15812 break;
15813 case Intrinsic::arm_neon_vst1x2:
15814 NewOpc = ARMISD::VST1x2_UPD;
15815 NumVecs = 2;
15816 isLoadOp = false;
15817 hasAlignment = false;
15818 break;
15819 case Intrinsic::arm_neon_vst1x3:
15820 NewOpc = ARMISD::VST1x3_UPD;
15821 NumVecs = 3;
15822 isLoadOp = false;
15823 hasAlignment = false;
15824 break;
15825 case Intrinsic::arm_neon_vst1x4:
15826 NewOpc = ARMISD::VST1x4_UPD;
15827 NumVecs = 4;
15828 isLoadOp = false;
15829 hasAlignment = false;
15830 break;
15831 }
15832 } else {
15833 isLaneOp = true;
15834 switch (N->getOpcode()) {
15835 default:
15836 llvm_unreachable("unexpected opcode for Neon base update")::llvm::llvm_unreachable_internal("unexpected opcode for Neon base update"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15836)
;
15837 case ARMISD::VLD1DUP:
15838 NewOpc = ARMISD::VLD1DUP_UPD;
15839 NumVecs = 1;
15840 break;
15841 case ARMISD::VLD2DUP:
15842 NewOpc = ARMISD::VLD2DUP_UPD;
15843 NumVecs = 2;
15844 break;
15845 case ARMISD::VLD3DUP:
15846 NewOpc = ARMISD::VLD3DUP_UPD;
15847 NumVecs = 3;
15848 break;
15849 case ARMISD::VLD4DUP:
15850 NewOpc = ARMISD::VLD4DUP_UPD;
15851 NumVecs = 4;
15852 break;
15853 case ISD::LOAD:
15854 NewOpc = ARMISD::VLD1_UPD;
15855 NumVecs = 1;
15856 isLaneOp = false;
15857 break;
15858 case ISD::STORE:
15859 NewOpc = ARMISD::VST1_UPD;
15860 NumVecs = 1;
15861 isLaneOp = false;
15862 isLoadOp = false;
15863 break;
15864 }
15865 }
15866
15867 // Find the size of memory referenced by the load/store.
15868 EVT VecTy;
15869 if (isLoadOp) {
15870 VecTy = N->getValueType(0);
15871 } else if (Target.isIntrinsic) {
15872 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15873 } else {
15874 assert(Target.isStore &&(static_cast <bool> (Target.isStore && "Node has to be a load, a store, or an intrinsic!"
) ? void (0) : __assert_fail ("Target.isStore && \"Node has to be a load, a store, or an intrinsic!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15875, __extension__
__PRETTY_FUNCTION__))
15875 "Node has to be a load, a store, or an intrinsic!")(static_cast <bool> (Target.isStore && "Node has to be a load, a store, or an intrinsic!"
) ? void (0) : __assert_fail ("Target.isStore && \"Node has to be a load, a store, or an intrinsic!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15875, __extension__
__PRETTY_FUNCTION__))
;
15876 VecTy = N->getOperand(1).getValueType();
15877 }
15878
15879 bool isVLDDUPOp =
15880 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15881 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15882
15883 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15884 if (isLaneOp || isVLDDUPOp)
15885 NumBytes /= VecTy.getVectorNumElements();
15886
15887 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15888 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15889 // separate instructions that make it harder to use a non-constant update.
15890 return false;
15891 }
15892
15893 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15894 return false;
15895
15896 // OK, we found an ADD we can fold into the base update.
15897 // Now, create a _UPD node, taking care of not breaking alignment.
15898
15899 EVT AlignedVecTy = VecTy;
15900 Align Alignment = MemN->getAlign();
15901
15902 // If this is a less-than-standard-aligned load/store, change the type to
15903 // match the standard alignment.
15904 // The alignment is overlooked when selecting _UPD variants; and it's
15905 // easier to introduce bitcasts here than fix that.
15906 // There are 3 ways to get to this base-update combine:
15907 // - intrinsics: they are assumed to be properly aligned (to the standard
15908 // alignment of the memory type), so we don't need to do anything.
15909 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15910 // intrinsics, so, likewise, there's nothing to do.
15911 // - generic load/store instructions: the alignment is specified as an
15912 // explicit operand, rather than implicitly as the standard alignment
15913 // of the memory type (like the intrisics). We need to change the
15914 // memory type to match the explicit alignment. That way, we don't
15915 // generate non-standard-aligned ARMISD::VLDx nodes.
15916 if (isa<LSBaseSDNode>(N)) {
15917 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15918 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15919 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.")(static_cast <bool> (NumVecs == 1 && "Unexpected multi-element generic load/store."
) ? void (0) : __assert_fail ("NumVecs == 1 && \"Unexpected multi-element generic load/store.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15919, __extension__
__PRETTY_FUNCTION__))
;
15920 assert(!isLaneOp && "Unexpected generic load/store lane.")(static_cast <bool> (!isLaneOp && "Unexpected generic load/store lane."
) ? void (0) : __assert_fail ("!isLaneOp && \"Unexpected generic load/store lane.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15920, __extension__
__PRETTY_FUNCTION__))
;
15921 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15922 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15923 }
15924 // Don't set an explicit alignment on regular load/stores that we want
15925 // to transform to VLD/VST 1_UPD nodes.
15926 // This matches the behavior of regular load/stores, which only get an
15927 // explicit alignment if the MMO alignment is larger than the standard
15928 // alignment of the memory type.
15929 // Intrinsics, however, always get an explicit alignment, set to the
15930 // alignment of the MMO.
15931 Alignment = Align(1);
15932 }
15933
15934 // Create the new updating load/store node.
15935 // First, create an SDVTList for the new updating node's results.
15936 EVT Tys[6];
15937 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
15938 unsigned n;
15939 for (n = 0; n < NumResultVecs; ++n)
15940 Tys[n] = AlignedVecTy;
15941 Tys[n++] = MVT::i32;
15942 Tys[n] = MVT::Other;
15943 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
15944
15945 // Then, gather the new node's operands.
15946 SmallVector<SDValue, 8> Ops;
15947 Ops.push_back(N->getOperand(0)); // incoming chain
15948 Ops.push_back(N->getOperand(Target.AddrOpIdx));
15949 Ops.push_back(User.Inc);
15950
15951 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
15952 // Try to match the intrinsic's signature
15953 Ops.push_back(StN->getValue());
15954 } else {
15955 // Loads (and of course intrinsics) match the intrinsics' signature,
15956 // so just add all but the alignment operand.
15957 unsigned LastOperand =
15958 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
15959 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
15960 Ops.push_back(N->getOperand(i));
15961 }
15962
15963 // For all node types, the alignment operand is always the last one.
15964 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
15965
15966 // If this is a non-standard-aligned STORE, the penultimate operand is the
15967 // stored value. Bitcast it to the aligned type.
15968 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
15969 SDValue &StVal = Ops[Ops.size() - 2];
15970 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
15971 }
15972
15973 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
15974 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
15975 MemN->getMemOperand());
15976
15977 // Update the uses.
15978 SmallVector<SDValue, 5> NewResults;
15979 for (unsigned i = 0; i < NumResultVecs; ++i)
15980 NewResults.push_back(SDValue(UpdN.getNode(), i));
15981
15982 // If this is an non-standard-aligned LOAD, the first result is the loaded
15983 // value. Bitcast it to the expected result type.
15984 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
15985 SDValue &LdVal = NewResults[0];
15986 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
15987 }
15988
15989 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
15990 DCI.CombineTo(N, NewResults);
15991 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
15992
15993 return true;
15994}
15995
15996// If (opcode ptr inc) is and ADD-like instruction, return the
15997// increment value. Otherwise return 0.
15998static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
15999 SDValue Inc, const SelectionDAG &DAG) {
16000 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16001 if (!CInc)
16002 return 0;
16003
16004 switch (Opcode) {
16005 case ARMISD::VLD1_UPD:
16006 case ISD::ADD:
16007 return CInc->getZExtValue();
16008 case ISD::OR: {
16009 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16010 // (OR ptr inc) is the same as (ADD ptr inc)
16011 return CInc->getZExtValue();
16012 }
16013 return 0;
16014 }
16015 default:
16016 return 0;
16017 }
16018}
16019
16020static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
16021 switch (N->getOpcode()) {
16022 case ISD::ADD:
16023 case ISD::OR: {
16024 if (isa<ConstantSDNode>(N->getOperand(1))) {
16025 *Ptr = N->getOperand(0);
16026 *CInc = N->getOperand(1);
16027 return true;
16028 }
16029 return false;
16030 }
16031 case ARMISD::VLD1_UPD: {
16032 if (isa<ConstantSDNode>(N->getOperand(2))) {
16033 *Ptr = N->getOperand(1);
16034 *CInc = N->getOperand(2);
16035 return true;
16036 }
16037 return false;
16038 }
16039 default:
16040 return false;
16041 }
16042}
16043
16044static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16045 // Check that the add is independent of the load/store.
16046 // Otherwise, folding it would create a cycle. Search through Addr
16047 // as well, since the User may not be a direct user of Addr and
16048 // only share a base pointer.
16049 SmallPtrSet<const SDNode *, 32> Visited;
16050 SmallVector<const SDNode *, 16> Worklist;
16051 Worklist.push_back(N);
16052 Worklist.push_back(User);
16053 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16054 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16055 return false;
16056 return true;
16057}
16058
16059/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16060/// NEON load/store intrinsics, and generic vector load/stores, to merge
16061/// base address updates.
16062/// For generic load/stores, the memory type is assumed to be a vector.
16063/// The caller is assumed to have checked legality.
16064static SDValue CombineBaseUpdate(SDNode *N,
16065 TargetLowering::DAGCombinerInfo &DCI) {
16066 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16067 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16068 const bool isStore = N->getOpcode() == ISD::STORE;
16069 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16070 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16071
16072 SDValue Addr = N->getOperand(AddrOpIdx);
16073
16074 SmallVector<BaseUpdateUser, 8> BaseUpdates;
16075
16076 // Search for a use of the address operand that is an increment.
16077 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16078 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16079 SDNode *User = *UI;
16080 if (UI.getUse().getResNo() != Addr.getResNo() ||
16081 User->getNumOperands() != 2)
16082 continue;
16083
16084 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16085 unsigned ConstInc =
16086 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16087
16088 if (ConstInc || User->getOpcode() == ISD::ADD)
16089 BaseUpdates.push_back({User, Inc, ConstInc});
16090 }
16091
16092 // If the address is a constant pointer increment itself, find
16093 // another constant increment that has the same base operand
16094 SDValue Base;
16095 SDValue CInc;
16096 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16097 unsigned Offset =
16098 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16099 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16100 UI != UE; ++UI) {
16101
16102 SDNode *User = *UI;
16103 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16104 User->getNumOperands() != 2)
16105 continue;
16106
16107 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16108 unsigned UserOffset =
16109 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16110
16111 if (!UserOffset || UserOffset <= Offset)
16112 continue;
16113
16114 unsigned NewConstInc = UserOffset - Offset;
16115 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16116 BaseUpdates.push_back({User, NewInc, NewConstInc});
16117 }
16118 }
16119
16120 // Try to fold the load/store with an update that matches memory
16121 // access size. This should work well for sequential loads.
16122 //
16123 // Filter out invalid updates as well.
16124 unsigned NumValidUpd = BaseUpdates.size();
16125 for (unsigned I = 0; I < NumValidUpd;) {
16126 BaseUpdateUser &User = BaseUpdates[I];
16127 if (!isValidBaseUpdate(N, User.N)) {
16128 --NumValidUpd;
16129 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16130 continue;
16131 }
16132
16133 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16134 return SDValue();
16135 ++I;
16136 }
16137 BaseUpdates.resize(NumValidUpd);
16138
16139 // Try to fold with other users. Non-constant updates are considered
16140 // first, and constant updates are sorted to not break a sequence of
16141 // strided accesses (if there is any).
16142 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16143 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16144 return LHS.ConstInc < RHS.ConstInc;
16145 });
16146 for (BaseUpdateUser &User : BaseUpdates) {
16147 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16148 return SDValue();
16149 }
16150 return SDValue();
16151}
16152
16153static SDValue PerformVLDCombine(SDNode *N,
16154 TargetLowering::DAGCombinerInfo &DCI) {
16155 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16156 return SDValue();
16157
16158 return CombineBaseUpdate(N, DCI);
16159}
16160
16161static SDValue PerformMVEVLDCombine(SDNode *N,
16162 TargetLowering::DAGCombinerInfo &DCI) {
16163 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16164 return SDValue();
16165
16166 SelectionDAG &DAG = DCI.DAG;
16167 SDValue Addr = N->getOperand(2);
16168 MemSDNode *MemN = cast<MemSDNode>(N);
16169 SDLoc dl(N);
16170
16171 // For the stores, where there are multiple intrinsics we only actually want
16172 // to post-inc the last of the them.
16173 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
16174 if (IntNo == Intrinsic::arm_mve_vst2q &&
16175 cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1)
16176 return SDValue();
16177 if (IntNo == Intrinsic::arm_mve_vst4q &&
16178 cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3)
16179 return SDValue();
16180
16181 // Search for a use of the address operand that is an increment.
16182 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16183 UE = Addr.getNode()->use_end();
16184 UI != UE; ++UI) {
16185 SDNode *User = *UI;
16186 if (User->getOpcode() != ISD::ADD ||
16187 UI.getUse().getResNo() != Addr.getResNo())
16188 continue;
16189
16190 // Check that the add is independent of the load/store. Otherwise, folding
16191 // it would create a cycle. We can avoid searching through Addr as it's a
16192 // predecessor to both.
16193 SmallPtrSet<const SDNode *, 32> Visited;
16194 SmallVector<const SDNode *, 16> Worklist;
16195 Visited.insert(Addr.getNode());
16196 Worklist.push_back(N);
16197 Worklist.push_back(User);
16198 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16199 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16200 continue;
16201
16202 // Find the new opcode for the updating load/store.
16203 bool isLoadOp = true;
16204 unsigned NewOpc = 0;
16205 unsigned NumVecs = 0;
16206 switch (IntNo) {
16207 default:
16208 llvm_unreachable("unexpected intrinsic for MVE VLDn combine")::llvm::llvm_unreachable_internal("unexpected intrinsic for MVE VLDn combine"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16208)
;
16209 case Intrinsic::arm_mve_vld2q:
16210 NewOpc = ARMISD::VLD2_UPD;
16211 NumVecs = 2;
16212 break;
16213 case Intrinsic::arm_mve_vld4q:
16214 NewOpc = ARMISD::VLD4_UPD;
16215 NumVecs = 4;
16216 break;
16217 case Intrinsic::arm_mve_vst2q:
16218 NewOpc = ARMISD::VST2_UPD;
16219 NumVecs = 2;
16220 isLoadOp = false;
16221 break;
16222 case Intrinsic::arm_mve_vst4q:
16223 NewOpc = ARMISD::VST4_UPD;
16224 NumVecs = 4;
16225 isLoadOp = false;
16226 break;
16227 }
16228
16229 // Find the size of memory referenced by the load/store.
16230 EVT VecTy;
16231 if (isLoadOp) {
16232 VecTy = N->getValueType(0);
16233 } else {
16234 VecTy = N->getOperand(3).getValueType();
16235 }
16236
16237 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16238
16239 // If the increment is a constant, it must match the memory ref size.
16240 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16241 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16242 if (!CInc || CInc->getZExtValue() != NumBytes)
16243 continue;
16244
16245 // Create the new updating load/store node.
16246 // First, create an SDVTList for the new updating node's results.
16247 EVT Tys[6];
16248 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16249 unsigned n;
16250 for (n = 0; n < NumResultVecs; ++n)
16251 Tys[n] = VecTy;
16252 Tys[n++] = MVT::i32;
16253 Tys[n] = MVT::Other;
16254 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16255
16256 // Then, gather the new node's operands.
16257 SmallVector<SDValue, 8> Ops;
16258 Ops.push_back(N->getOperand(0)); // incoming chain
16259 Ops.push_back(N->getOperand(2)); // ptr
16260 Ops.push_back(Inc);
16261
16262 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16263 Ops.push_back(N->getOperand(i));
16264
16265 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16266 MemN->getMemOperand());
16267
16268 // Update the uses.
16269 SmallVector<SDValue, 5> NewResults;
16270 for (unsigned i = 0; i < NumResultVecs; ++i)
16271 NewResults.push_back(SDValue(UpdN.getNode(), i));
16272
16273 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16274 DCI.CombineTo(N, NewResults);
16275 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16276
16277 break;
16278 }
16279
16280 return SDValue();
16281}
16282
16283/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16284/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16285/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16286/// return true.
16287static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
16288 SelectionDAG &DAG = DCI.DAG;
16289 EVT VT = N->getValueType(0);
16290 // vldN-dup instructions only support 64-bit vectors for N > 1.
16291 if (!VT.is64BitVector())
16292 return false;
16293
16294 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16295 SDNode *VLD = N->getOperand(0).getNode();
16296 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16297 return false;
16298 unsigned NumVecs = 0;
16299 unsigned NewOpc = 0;
16300 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
16301 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16302 NumVecs = 2;
16303 NewOpc = ARMISD::VLD2DUP;
16304 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16305 NumVecs = 3;
16306 NewOpc = ARMISD::VLD3DUP;
16307 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16308 NumVecs = 4;
16309 NewOpc = ARMISD::VLD4DUP;
16310 } else {
16311 return false;
16312 }
16313
16314 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16315 // numbers match the load.
16316 unsigned VLDLaneNo =
16317 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
16318 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16319 UI != UE; ++UI) {
16320 // Ignore uses of the chain result.
16321 if (UI.getUse().getResNo() == NumVecs)
16322 continue;
16323 SDNode *User = *UI;
16324 if (User->getOpcode() != ARMISD::VDUPLANE ||
16325 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
16326 return false;
16327 }
16328
16329 // Create the vldN-dup node.
16330 EVT Tys[5];
16331 unsigned n;
16332 for (n = 0; n < NumVecs; ++n)
16333 Tys[n] = VT;
16334 Tys[n] = MVT::Other;
16335 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16336 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16337 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16338 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16339 Ops, VLDMemInt->getMemoryVT(),
16340 VLDMemInt->getMemOperand());
16341
16342 // Update the uses.
16343 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16344 UI != UE; ++UI) {
16345 unsigned ResNo = UI.getUse().getResNo();
16346 // Ignore uses of the chain result.
16347 if (ResNo == NumVecs)
16348 continue;
16349 SDNode *User = *UI;
16350 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16351 }
16352
16353 // Now the vldN-lane intrinsic is dead except for its chain result.
16354 // Update uses of the chain.
16355 std::vector<SDValue> VLDDupResults;
16356 for (unsigned n = 0; n < NumVecs; ++n)
16357 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16358 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16359 DCI.CombineTo(VLD, VLDDupResults);
16360
16361 return true;
16362}
16363
16364/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16365/// ARMISD::VDUPLANE.
16366static SDValue PerformVDUPLANECombine(SDNode *N,
16367 TargetLowering::DAGCombinerInfo &DCI,
16368 const ARMSubtarget *Subtarget) {
16369 SDValue Op = N->getOperand(0);
16370 EVT VT = N->getValueType(0);
16371
16372 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16373 if (Subtarget->hasMVEIntegerOps()) {
16374 EVT ExtractVT = VT.getVectorElementType();
16375 // We need to ensure we are creating a legal type.
16376 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16377 ExtractVT = MVT::i32;
16378 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16379 N->getOperand(0), N->getOperand(1));
16380 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16381 }
16382
16383 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16384 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16385 if (CombineVLDDUP(N, DCI))
16386 return SDValue(N, 0);
16387
16388 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16389 // redundant. Ignore bit_converts for now; element sizes are checked below.
16390 while (Op.getOpcode() == ISD::BITCAST)
16391 Op = Op.getOperand(0);
16392 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16393 return SDValue();
16394
16395 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16396 unsigned EltSize = Op.getScalarValueSizeInBits();
16397 // The canonical VMOV for a zero vector uses a 32-bit element size.
16398 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16399 unsigned EltBits;
16400 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16401 EltSize = 8;
16402 if (EltSize > VT.getScalarSizeInBits())
16403 return SDValue();
16404
16405 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16406}
16407
16408/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16409static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,
16410 const ARMSubtarget *Subtarget) {
16411 SDValue Op = N->getOperand(0);
16412 SDLoc dl(N);
16413
16414 if (Subtarget->hasMVEIntegerOps()) {
16415 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16416 // need to come from a GPR.
16417 if (Op.getValueType() == MVT::f32)
16418 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16419 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16420 else if (Op.getValueType() == MVT::f16)
16421 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16422 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16423 }
16424
16425 if (!Subtarget->hasNEON())
16426 return SDValue();
16427
16428 // Match VDUP(LOAD) -> VLD1DUP.
16429 // We match this pattern here rather than waiting for isel because the
16430 // transform is only legal for unindexed loads.
16431 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16432 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16433 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16434 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16435 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16436 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16437 SDValue VLDDup =
16438 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16439 LD->getMemoryVT(), LD->getMemOperand());
16440 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16441 return VLDDup;
16442 }
16443
16444 return SDValue();
16445}
16446
16447static SDValue PerformLOADCombine(SDNode *N,
16448 TargetLowering::DAGCombinerInfo &DCI,
16449 const ARMSubtarget *Subtarget) {
16450 EVT VT = N->getValueType(0);
16451
16452 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16453 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16454 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
16455 return CombineBaseUpdate(N, DCI);
16456
16457 return SDValue();
16458}
16459
16460// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16461// pack all of the elements in one place. Next, store to memory in fewer
16462// chunks.
16463static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,
16464 SelectionDAG &DAG) {
16465 SDValue StVal = St->getValue();
16466 EVT VT = StVal.getValueType();
16467 if (!St->isTruncatingStore() || !VT.isVector())
16468 return SDValue();
16469 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16470 EVT StVT = St->getMemoryVT();
16471 unsigned NumElems = VT.getVectorNumElements();
16472 assert(StVT != VT && "Cannot truncate to the same type")(static_cast <bool> (StVT != VT && "Cannot truncate to the same type"
) ? void (0) : __assert_fail ("StVT != VT && \"Cannot truncate to the same type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16472, __extension__
__PRETTY_FUNCTION__))
;
16473 unsigned FromEltSz = VT.getScalarSizeInBits();
16474 unsigned ToEltSz = StVT.getScalarSizeInBits();
16475
16476 // From, To sizes and ElemCount must be pow of two
16477 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16478 return SDValue();
16479
16480 // We are going to use the original vector elt for storing.
16481 // Accumulated smaller vector elements must be a multiple of the store size.
16482 if (0 != (NumElems * FromEltSz) % ToEltSz)
16483 return SDValue();
16484
16485 unsigned SizeRatio = FromEltSz / ToEltSz;
16486 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits())(static_cast <bool> (SizeRatio * NumElems * ToEltSz == VT
.getSizeInBits()) ? void (0) : __assert_fail ("SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16486, __extension__
__PRETTY_FUNCTION__))
;
16487
16488 // Create a type on which we perform the shuffle.
16489 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16490 NumElems * SizeRatio);
16491 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (WideVecVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("WideVecVT.getSizeInBits() == VT.getSizeInBits()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16491, __extension__
__PRETTY_FUNCTION__))
;
16492
16493 SDLoc DL(St);
16494 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16495 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16496 for (unsigned i = 0; i < NumElems; ++i)
16497 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16498 : i * SizeRatio;
16499
16500 // Can't shuffle using an illegal type.
16501 if (!TLI.isTypeLegal(WideVecVT))
16502 return SDValue();
16503
16504 SDValue Shuff = DAG.getVectorShuffle(
16505 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16506 // At this point all of the data is stored at the bottom of the
16507 // register. We now need to save it to mem.
16508
16509 // Find the largest store unit
16510 MVT StoreType = MVT::i8;
16511 for (MVT Tp : MVT::integer_valuetypes()) {
16512 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16513 StoreType = Tp;
16514 }
16515 // Didn't find a legal store type.
16516 if (!TLI.isTypeLegal(StoreType))
16517 return SDValue();
16518
16519 // Bitcast the original vector into a vector of store-size units
16520 EVT StoreVecVT =
16521 EVT::getVectorVT(*DAG.getContext(), StoreType,
16522 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16523 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (StoreVecVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("StoreVecVT.getSizeInBits() == VT.getSizeInBits()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16523, __extension__
__PRETTY_FUNCTION__))
;
16524 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16525 SmallVector<SDValue, 8> Chains;
16526 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16527 TLI.getPointerTy(DAG.getDataLayout()));
16528 SDValue BasePtr = St->getBasePtr();
16529
16530 // Perform one or more big stores into memory.
16531 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16532 for (unsigned I = 0; I < E; I++) {
16533 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16534 ShuffWide, DAG.getIntPtrConstant(I, DL));
16535 SDValue Ch =
16536 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16537 St->getAlign(), St->getMemOperand()->getFlags());
16538 BasePtr =
16539 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16540 Chains.push_back(Ch);
16541 }
16542 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16543}
16544
16545// Try taking a single vector store from an fpround (which would otherwise turn
16546// into an expensive buildvector) and splitting it into a series of narrowing
16547// stores.
16548static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
16549 SelectionDAG &DAG) {
16550 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16551 return SDValue();
16552 SDValue Trunc = St->getValue();
16553 if (Trunc->getOpcode() != ISD::FP_ROUND)
16554 return SDValue();
16555 EVT FromVT = Trunc->getOperand(0).getValueType();
16556 EVT ToVT = Trunc.getValueType();
16557 if (!ToVT.isVector())
16558 return SDValue();
16559 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements())(static_cast <bool> (FromVT.getVectorNumElements() == ToVT
.getVectorNumElements()) ? void (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16559, __extension__
__PRETTY_FUNCTION__))
;
16560 EVT ToEltVT = ToVT.getVectorElementType();
16561 EVT FromEltVT = FromVT.getVectorElementType();
16562
16563 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16564 return SDValue();
16565
16566 unsigned NumElements = 4;
16567 if (FromVT.getVectorNumElements() % NumElements != 0)
16568 return SDValue();
16569
16570 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16571 // use the VMOVN over splitting the store. We are looking for patterns of:
16572 // !rev: 0 N 1 N+1 2 N+2 ...
16573 // rev: N 0 N+1 1 N+2 2 ...
16574 // The shuffle may either be a single source (in which case N = NumElts/2) or
16575 // two inputs extended with concat to the same size (in which case N =
16576 // NumElts).
16577 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16578 ArrayRef<int> M = SVN->getMask();
16579 unsigned NumElts = ToVT.getVectorNumElements();
16580 if (SVN->getOperand(1).isUndef())
16581 NumElts /= 2;
16582
16583 unsigned Off0 = Rev ? NumElts : 0;
16584 unsigned Off1 = Rev ? 0 : NumElts;
16585
16586 for (unsigned I = 0; I < NumElts; I += 2) {
16587 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16588 return false;
16589 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16590 return false;
16591 }
16592
16593 return true;
16594 };
16595
16596 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16597 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16598 return SDValue();
16599
16600 LLVMContext &C = *DAG.getContext();
16601 SDLoc DL(St);
16602 // Details about the old store
16603 SDValue Ch = St->getChain();
16604 SDValue BasePtr = St->getBasePtr();
16605 Align Alignment = St->getOriginalAlign();
16606 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16607 AAMDNodes AAInfo = St->getAAInfo();
16608
16609 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16610 // and then stored as truncating integer stores.
16611 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16612 EVT NewToVT = EVT::getVectorVT(
16613 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16614
16615 SmallVector<SDValue, 4> Stores;
16616 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16617 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16618 SDValue NewPtr =
16619 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
16620
16621 SDValue Extract =
16622 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16623 DAG.getConstant(i * NumElements, DL, MVT::i32));
16624
16625 SDValue FPTrunc =
16626 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16627 Extract, DAG.getConstant(0, DL, MVT::i32));
16628 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16629
16630 SDValue Store = DAG.getTruncStore(
16631 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16632 NewToVT, Alignment, MMOFlags, AAInfo);
16633 Stores.push_back(Store);
16634 }
16635 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16636}
16637
16638// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16639// into an expensive buildvector) and splitting it into a series of narrowing
16640// stores.
16641static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St,
16642 SelectionDAG &DAG) {
16643 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16644 return SDValue();
16645 SDValue Trunc = St->getValue();
16646 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16647 return SDValue();
16648 EVT FromVT = Trunc->getOperand(0).getValueType();
16649 EVT ToVT = Trunc.getValueType();
16650
16651 LLVMContext &C = *DAG.getContext();
16652 SDLoc DL(St);
16653 // Details about the old store
16654 SDValue Ch = St->getChain();
16655 SDValue BasePtr = St->getBasePtr();
16656 Align Alignment = St->getOriginalAlign();
16657 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16658 AAMDNodes AAInfo = St->getAAInfo();
16659
16660 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16661 FromVT.getVectorNumElements());
16662
16663 SmallVector<SDValue, 4> Stores;
16664 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16665 unsigned NewOffset =
16666 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16667 SDValue NewPtr =
16668 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
16669
16670 SDValue Extract = Trunc.getOperand(i);
16671 SDValue Store = DAG.getTruncStore(
16672 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16673 NewToVT, Alignment, MMOFlags, AAInfo);
16674 Stores.push_back(Store);
16675 }
16676 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16677}
16678
16679// Given a floating point store from an extracted vector, with an integer
16680// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16681// help reduce fp register pressure, doesn't require the fp extract and allows
16682// use of more integer post-inc stores not available with vstr.
16683static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG) {
16684 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16685 return SDValue();
16686 SDValue Extract = St->getValue();
16687 EVT VT = Extract.getValueType();
16688 // For now only uses f16. This may be useful for f32 too, but that will
16689 // be bitcast(extract), not the VGETLANEu we currently check here.
16690 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16691 return SDValue();
16692
16693 SDNode *GetLane =
16694 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16695 {Extract.getOperand(0), Extract.getOperand(1)});
16696 if (!GetLane)
16697 return SDValue();
16698
16699 LLVMContext &C = *DAG.getContext();
16700 SDLoc DL(St);
16701 // Create a new integer store to replace the existing floating point version.
16702 SDValue Ch = St->getChain();
16703 SDValue BasePtr = St->getBasePtr();
16704 Align Alignment = St->getOriginalAlign();
16705 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16706 AAMDNodes AAInfo = St->getAAInfo();
16707 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16708 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16709 St->getPointerInfo(), NewToVT, Alignment,
16710 MMOFlags, AAInfo);
16711
16712 return Store;
16713}
16714
16715/// PerformSTORECombine - Target-specific dag combine xforms for
16716/// ISD::STORE.
16717static SDValue PerformSTORECombine(SDNode *N,
16718 TargetLowering::DAGCombinerInfo &DCI,
16719 const ARMSubtarget *Subtarget) {
16720 StoreSDNode *St = cast<StoreSDNode>(N);
16721 if (St->isVolatile())
16722 return SDValue();
16723 SDValue StVal = St->getValue();
16724 EVT VT = StVal.getValueType();
16725
16726 if (Subtarget->hasNEON())
16727 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16728 return Store;
16729
16730 if (Subtarget->hasMVEIntegerOps()) {
16731 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16732 return NewToken;
16733 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16734 return NewChain;
16735 if (SDValue NewToken =
16736 PerformSplittingMVETruncToNarrowingStores(St, DCI.DAG))
16737 return NewToken;
16738 }
16739
16740 if (!ISD::isNormalStore(St))
16741 return SDValue();
16742
16743 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16744 // ARM stores of arguments in the same cache line.
16745 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16746 StVal.getNode()->hasOneUse()) {
16747 SelectionDAG &DAG = DCI.DAG;
16748 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16749 SDLoc DL(St);
16750 SDValue BasePtr = St->getBasePtr();
16751 SDValue NewST1 = DAG.getStore(
16752 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16753 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16754 St->getMemOperand()->getFlags());
16755
16756 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16757 DAG.getConstant(4, DL, MVT::i32));
16758 return DAG.getStore(NewST1.getValue(0), DL,
16759 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16760 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16761 St->getOriginalAlign(),
16762 St->getMemOperand()->getFlags());
16763 }
16764
16765 if (StVal.getValueType() == MVT::i64 &&
16766 StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16767
16768 // Bitcast an i64 store extracted from a vector to f64.
16769 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16770 SelectionDAG &DAG = DCI.DAG;
16771 SDLoc dl(StVal);
16772 SDValue IntVec = StVal.getOperand(0);
16773 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16774 IntVec.getValueType().getVectorNumElements());
16775 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16776 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16777 Vec, StVal.getOperand(1));
16778 dl = SDLoc(N);
16779 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16780 // Make the DAGCombiner fold the bitcasts.
16781 DCI.AddToWorklist(Vec.getNode());
16782 DCI.AddToWorklist(ExtElt.getNode());
16783 DCI.AddToWorklist(V.getNode());
16784 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16785 St->getPointerInfo(), St->getAlign(),
16786 St->getMemOperand()->getFlags(), St->getAAInfo());
16787 }
16788
16789 // If this is a legal vector store, try to combine it into a VST1_UPD.
16790 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16791 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
16792 return CombineBaseUpdate(N, DCI);
16793
16794 return SDValue();
16795}
16796
16797/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16798/// can replace combinations of VMUL and VCVT (floating-point to integer)
16799/// when the VMUL has a constant operand that is a power of 2.
16800///
16801/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16802/// vmul.f32 d16, d17, d16
16803/// vcvt.s32.f32 d16, d16
16804/// becomes:
16805/// vcvt.s32.f32 d16, d16, #3
16806static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
16807 const ARMSubtarget *Subtarget) {
16808 if (!Subtarget->hasNEON())
16809 return SDValue();
16810
16811 SDValue Op = N->getOperand(0);
16812 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16813 Op.getOpcode() != ISD::FMUL)
16814 return SDValue();
16815
16816 SDValue ConstVec = Op->getOperand(1);
16817 if (!isa<BuildVectorSDNode>(ConstVec))
16818 return SDValue();
16819
16820 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16821 uint32_t FloatBits = FloatTy.getSizeInBits();
16822 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16823 uint32_t IntBits = IntTy.getSizeInBits();
16824 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16825 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16826 // These instructions only exist converting from f32 to i32. We can handle
16827 // smaller integers by generating an extra truncate, but larger ones would
16828 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16829 // these intructions only support v2i32/v4i32 types.
16830 return SDValue();
16831 }
16832
16833 BitVector UndefElements;
16834 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16835 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16836 if (C == -1 || C == 0 || C > 32)
16837 return SDValue();
16838
16839 SDLoc dl(N);
16840 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16841 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16842 Intrinsic::arm_neon_vcvtfp2fxu;
16843 SDValue FixConv = DAG.getNode(
16844 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16845 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16846 DAG.getConstant(C, dl, MVT::i32));
16847
16848 if (IntBits < FloatBits)
16849 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16850
16851 return FixConv;
16852}
16853
16854static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
16855 const ARMSubtarget *Subtarget) {
16856 if (!Subtarget->hasMVEFloatOps())
16857 return SDValue();
16858
16859 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16860 // The second form can be more easily turned into a predicated vadd, and
16861 // possibly combined into a fma to become a predicated vfma.
16862 SDValue Op0 = N->getOperand(0);
16863 SDValue Op1 = N->getOperand(1);
16864 EVT VT = N->getValueType(0);
16865 SDLoc DL(N);
16866
16867 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16868 // which these VMOV's represent.
16869 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16870 if (Op.getOpcode() != ISD::BITCAST ||
16871 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16872 return false;
16873 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16874 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16875 return true;
16876 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16877 return true;
16878 return false;
16879 };
16880
16881 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16882 std::swap(Op0, Op1);
16883
16884 if (Op1.getOpcode() != ISD::VSELECT)
16885 return SDValue();
16886
16887 SDNodeFlags FaddFlags = N->getFlags();
16888 bool NSZ = FaddFlags.hasNoSignedZeros();
16889 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16890 return SDValue();
16891
16892 SDValue FAdd =
16893 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16894 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16895}
16896
16897static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG) {
16898 SDValue LHS = N->getOperand(0);
16899 SDValue RHS = N->getOperand(1);
16900 EVT VT = N->getValueType(0);
16901 SDLoc DL(N);
16902
16903 if (!N->getFlags().hasAllowReassociation())
16904 return SDValue();
16905
16906 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
16907 auto ReassocComplex = [&](SDValue A, SDValue B) {
16908 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
16909 return SDValue();
16910 unsigned Opc = A.getConstantOperandVal(0);
16911 if (Opc != Intrinsic::arm_mve_vcmlaq)
16912 return SDValue();
16913 SDValue VCMLA = DAG.getNode(
16914 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
16915 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
16916 A.getOperand(3), A.getOperand(4));
16917 VCMLA->setFlags(A->getFlags());
16918 return VCMLA;
16919 };
16920 if (SDValue R = ReassocComplex(LHS, RHS))
16921 return R;
16922 if (SDValue R = ReassocComplex(RHS, LHS))
16923 return R;
16924
16925 return SDValue();
16926}
16927
16928static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
16929 const ARMSubtarget *Subtarget) {
16930 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
16931 return S;
16932 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
16933 return S;
16934 return SDValue();
16935}
16936
16937/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
16938/// can replace combinations of VCVT (integer to floating-point) and VDIV
16939/// when the VDIV has a constant operand that is a power of 2.
16940///
16941/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16942/// vcvt.f32.s32 d16, d16
16943/// vdiv.f32 d16, d17, d16
16944/// becomes:
16945/// vcvt.f32.s32 d16, d16, #3
16946static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
16947 const ARMSubtarget *Subtarget) {
16948 if (!Subtarget->hasNEON())
16949 return SDValue();
16950
16951 SDValue Op = N->getOperand(0);
16952 unsigned OpOpcode = Op.getNode()->getOpcode();
16953 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
16954 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
16955 return SDValue();
16956
16957 SDValue ConstVec = N->getOperand(1);
16958 if (!isa<BuildVectorSDNode>(ConstVec))
16959 return SDValue();
16960
16961 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
16962 uint32_t FloatBits = FloatTy.getSizeInBits();
16963 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
16964 uint32_t IntBits = IntTy.getSizeInBits();
16965 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16966 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16967 // These instructions only exist converting from i32 to f32. We can handle
16968 // smaller integers by generating an extra extend, but larger ones would
16969 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16970 // these intructions only support v2i32/v4i32 types.
16971 return SDValue();
16972 }
16973
16974 BitVector UndefElements;
16975 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16976 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16977 if (C == -1 || C == 0 || C > 32)
16978 return SDValue();
16979
16980 SDLoc dl(N);
16981 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
16982 SDValue ConvInput = Op.getOperand(0);
16983 if (IntBits < FloatBits)
16984 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
16985 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16986 ConvInput);
16987
16988 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
16989 Intrinsic::arm_neon_vcvtfxu2fp;
16990 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
16991 Op.getValueType(),
16992 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
16993 ConvInput, DAG.getConstant(C, dl, MVT::i32));
16994}
16995
16996static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
16997 const ARMSubtarget *ST) {
16998 if (!ST->hasMVEIntegerOps())
16999 return SDValue();
17000
17001 assert(N->getOpcode() == ISD::VECREDUCE_ADD)(static_cast <bool> (N->getOpcode() == ISD::VECREDUCE_ADD
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::VECREDUCE_ADD"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17001, __extension__
__PRETTY_FUNCTION__))
;
17002 EVT ResVT = N->getValueType(0);
17003 SDValue N0 = N->getOperand(0);
17004 SDLoc dl(N);
17005
17006 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17007 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17008 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17009 N0.getValueType() == MVT::v16i8)) {
17010 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17011 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17012 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17013 }
17014
17015 // We are looking for something that will have illegal types if left alone,
17016 // but that we can convert to a single instruction under MVE. For example
17017 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17018 // or
17019 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17020
17021 // The legal cases are:
17022 // VADDV u/s 8/16/32
17023 // VMLAV u/s 8/16/32
17024 // VADDLV u/s 32
17025 // VMLALV u/s 16/32
17026
17027 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17028 // extend it and use v4i32 instead.
17029 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17030 EVT AVT = A.getValueType();
17031 return any_of(ExtTypes, [&](MVT Ty) {
17032 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17033 AVT.bitsLE(Ty);
17034 });
17035 };
17036 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17037 EVT AVT = A.getValueType();
17038 if (!AVT.is128BitVector())
17039 A = DAG.getNode(ExtendCode, dl,
17040 AVT.changeVectorElementType(MVT::getIntegerVT(
17041 128 / AVT.getVectorMinNumElements())),
17042 A);
17043 return A;
17044 };
17045 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17046 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17047 return SDValue();
17048 SDValue A = N0->getOperand(0);
17049 if (ExtTypeMatches(A, ExtTypes))
17050 return ExtendIfNeeded(A, ExtendCode);
17051 return SDValue();
17052 };
17053 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17054 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17055 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17056 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
17057 return SDValue();
17058 Mask = N0->getOperand(0);
17059 SDValue Ext = N0->getOperand(1);
17060 if (Ext->getOpcode() != ExtendCode)
17061 return SDValue();
17062 SDValue A = Ext->getOperand(0);
17063 if (ExtTypeMatches(A, ExtTypes))
17064 return ExtendIfNeeded(A, ExtendCode);
17065 return SDValue();
17066 };
17067 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17068 SDValue &A, SDValue &B) {
17069 // For a vmla we are trying to match a larger pattern:
17070 // ExtA = sext/zext A
17071 // ExtB = sext/zext B
17072 // Mul = mul ExtA, ExtB
17073 // vecreduce.add Mul
17074 // There might also be en extra extend between the mul and the addreduce, so
17075 // long as the bitwidth is high enough to make them equivalent (for example
17076 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17077 if (ResVT != RetTy)
17078 return false;
17079 SDValue Mul = N0;
17080 if (Mul->getOpcode() == ExtendCode &&
17081 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17082 ResVT.getScalarSizeInBits())
17083 Mul = Mul->getOperand(0);
17084 if (Mul->getOpcode() != ISD::MUL)
17085 return false;
17086 SDValue ExtA = Mul->getOperand(0);
17087 SDValue ExtB = Mul->getOperand(1);
17088 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17089 return false;
17090 A = ExtA->getOperand(0);
17091 B = ExtB->getOperand(0);
17092 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17093 A = ExtendIfNeeded(A, ExtendCode);
17094 B = ExtendIfNeeded(B, ExtendCode);
17095 return true;
17096 }
17097 return false;
17098 };
17099 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17100 SDValue &A, SDValue &B, SDValue &Mask) {
17101 // Same as the pattern above with a select for the zero predicated lanes
17102 // ExtA = sext/zext A
17103 // ExtB = sext/zext B
17104 // Mul = mul ExtA, ExtB
17105 // N0 = select Mask, Mul, 0
17106 // vecreduce.add N0
17107 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17108 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
17109 return false;
17110 Mask = N0->getOperand(0);
17111 SDValue Mul = N0->getOperand(1);
17112 if (Mul->getOpcode() == ExtendCode &&
17113 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17114 ResVT.getScalarSizeInBits())
17115 Mul = Mul->getOperand(0);
17116 if (Mul->getOpcode() != ISD::MUL)
17117 return false;
17118 SDValue ExtA = Mul->getOperand(0);
17119 SDValue ExtB = Mul->getOperand(1);
17120 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17121 return false;
17122 A = ExtA->getOperand(0);
17123 B = ExtB->getOperand(0);
17124 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17125 A = ExtendIfNeeded(A, ExtendCode);
17126 B = ExtendIfNeeded(B, ExtendCode);
17127 return true;
17128 }
17129 return false;
17130 };
17131 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17132 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17133 // reductions. The operands are extended with MVEEXT, but as they are
17134 // reductions the lane orders do not matter. MVEEXT may be combined with
17135 // loads to produce two extending loads, or else they will be expanded to
17136 // VREV/VMOVL.
17137 EVT VT = Ops[0].getValueType();
17138 if (VT == MVT::v16i8) {
17139 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&(static_cast <bool> ((Opcode == ARMISD::VMLALVs || Opcode
== ARMISD::VMLALVu) && "Unexpected illegal long reduction opcode"
) ? void (0) : __assert_fail ("(Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) && \"Unexpected illegal long reduction opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17140, __extension__
__PRETTY_FUNCTION__))
17140 "Unexpected illegal long reduction opcode")(static_cast <bool> ((Opcode == ARMISD::VMLALVs || Opcode
== ARMISD::VMLALVu) && "Unexpected illegal long reduction opcode"
) ? void (0) : __assert_fail ("(Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) && \"Unexpected illegal long reduction opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17140, __extension__
__PRETTY_FUNCTION__))
;
17141 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17142
17143 SDValue Ext0 =
17144 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17145 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17146 SDValue Ext1 =
17147 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17148 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17149
17150 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17151 Ext0, Ext1);
17152 SDValue MLA1 =
17153 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17154 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17155 Ext0.getValue(1), Ext1.getValue(1));
17156 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17157 }
17158 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17159 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17160 SDValue(Node.getNode(), 1));
17161 };
17162
17163 SDValue A, B;
17164 SDValue Mask;
17165 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17166 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17167 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17168 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17169 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17170 A, B))
17171 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17172 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17173 A, B))
17174 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17175 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17176 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17177 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17178 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17179 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17180 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17181
17182 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17183 Mask))
17184 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17185 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17186 Mask))
17187 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17188 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17189 Mask))
17190 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17191 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17192 Mask))
17193 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17194 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17195 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17196 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17197 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17198 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17199 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17200
17201 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17202 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17203 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17204 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17205 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17206 return Create64bitNode(ARMISD::VADDLVs, {A});
17207 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17208 return Create64bitNode(ARMISD::VADDLVu, {A});
17209 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17210 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17211 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17212 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17213 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17214 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17215
17216 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17217 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17218 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17219 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17220 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17221 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17222 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17223 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17224 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17225 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17226 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17227 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17228 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17229 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17230
17231 // Some complications. We can get a case where the two inputs of the mul are
17232 // the same, then the output sext will have been helpfully converted to a
17233 // zext. Turn it back.
17234 SDValue Op = N0;
17235 if (Op->getOpcode() == ISD::VSELECT)
17236 Op = Op->getOperand(1);
17237 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17238 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17239 SDValue Mul = Op->getOperand(0);
17240 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17241 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17242 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17243 if (Op != N0)
17244 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17245 N0->getOperand(0), Ext, N0->getOperand(2));
17246 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17247 }
17248 }
17249
17250 return SDValue();
17251}
17252
17253// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17254// the lanes are used. Due to the reduction being commutative the shuffle can be
17255// removed.
17256static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG) {
17257 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17258 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17259 if (!Shuf || !Shuf->getOperand(1).isUndef())
17260 return SDValue();
17261
17262 // Check all elements are used once in the mask.
17263 ArrayRef<int> Mask = Shuf->getMask();
17264 APInt SetElts(Mask.size(), 0);
17265 for (int E : Mask) {
17266 if (E < 0 || E >= (int)Mask.size())
17267 return SDValue();
17268 SetElts.setBit(E);
17269 }
17270 if (!SetElts.isAllOnes())
17271 return SDValue();
17272
17273 if (N->getNumOperands() != VecOp + 1) {
17274 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17275 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17276 return SDValue();
17277 }
17278
17279 SmallVector<SDValue> Ops;
17280 for (SDValue Op : N->ops()) {
17281 if (Op.getValueType().isVector())
17282 Ops.push_back(Op.getOperand(0));
17283 else
17284 Ops.push_back(Op);
17285 }
17286 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17287}
17288
17289static SDValue PerformVMOVNCombine(SDNode *N,
17290 TargetLowering::DAGCombinerInfo &DCI) {
17291 SDValue Op0 = N->getOperand(0);
17292 SDValue Op1 = N->getOperand(1);
17293 unsigned IsTop = N->getConstantOperandVal(2);
17294
17295 // VMOVNT a undef -> a
17296 // VMOVNB a undef -> a
17297 // VMOVNB undef a -> a
17298 if (Op1->isUndef())
17299 return Op0;
17300 if (Op0->isUndef() && !IsTop)
17301 return Op1;
17302
17303 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17304 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17305 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17306 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17307 Op1->getConstantOperandVal(2) == 0)
17308 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17309 Op0, Op1->getOperand(1), N->getOperand(2));
17310
17311 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17312 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17313 // into the top or bottom lanes.
17314 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17315 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17316 APInt Op0DemandedElts =
17317 IsTop ? Op1DemandedElts
17318 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17319
17320 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17321 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17322 return SDValue(N, 0);
17323 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17324 return SDValue(N, 0);
17325
17326 return SDValue();
17327}
17328
17329static SDValue PerformVQMOVNCombine(SDNode *N,
17330 TargetLowering::DAGCombinerInfo &DCI) {
17331 SDValue Op0 = N->getOperand(0);
17332 unsigned IsTop = N->getConstantOperandVal(2);
17333
17334 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17335 APInt Op0DemandedElts =
17336 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17337 : APInt::getHighBitsSet(2, 1));
17338
17339 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17340 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17341 return SDValue(N, 0);
17342 return SDValue();
17343}
17344
17345static SDValue PerformVQDMULHCombine(SDNode *N,
17346 TargetLowering::DAGCombinerInfo &DCI) {
17347 EVT VT = N->getValueType(0);
17348 SDValue LHS = N->getOperand(0);
17349 SDValue RHS = N->getOperand(1);
17350
17351 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17352 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17353 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17354 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17355 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17356 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17357 SDLoc DL(N);
17358 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17359 LHS.getOperand(0), RHS.getOperand(0));
17360 SDValue UndefV = LHS.getOperand(1);
17361 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17362 }
17363 return SDValue();
17364}
17365
17366static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
17367 SDLoc DL(N);
17368 SDValue Op0 = N->getOperand(0);
17369 SDValue Op1 = N->getOperand(1);
17370
17371 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17372 // uses of the intrinsics.
17373 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17374 int ShiftAmt = C->getSExtValue();
17375 if (ShiftAmt == 0) {
17376 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17377 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17378 return SDValue();
17379 }
17380
17381 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17382 unsigned NewOpcode =
17383 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17384 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17385 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17386 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17387 return NewShift;
17388 }
17389 }
17390
17391 return SDValue();
17392}
17393
17394/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17395SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
17396 DAGCombinerInfo &DCI) const {
17397 SelectionDAG &DAG = DCI.DAG;
17398 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
17399 switch (IntNo) {
17400 default:
17401 // Don't do anything for most intrinsics.
17402 break;
17403
17404 // Vector shifts: check for immediate versions and lower them.
17405 // Note: This is done during DAG combining instead of DAG legalizing because
17406 // the build_vectors for 64-bit vector element shift counts are generally
17407 // not legal, and it is hard to see their values after they get legalized to
17408 // loads from a constant pool.
17409 case Intrinsic::arm_neon_vshifts:
17410 case Intrinsic::arm_neon_vshiftu:
17411 case Intrinsic::arm_neon_vrshifts:
17412 case Intrinsic::arm_neon_vrshiftu:
17413 case Intrinsic::arm_neon_vrshiftn:
17414 case Intrinsic::arm_neon_vqshifts:
17415 case Intrinsic::arm_neon_vqshiftu:
17416 case Intrinsic::arm_neon_vqshiftsu:
17417 case Intrinsic::arm_neon_vqshiftns:
17418 case Intrinsic::arm_neon_vqshiftnu:
17419 case Intrinsic::arm_neon_vqshiftnsu:
17420 case Intrinsic::arm_neon_vqrshiftns:
17421 case Intrinsic::arm_neon_vqrshiftnu:
17422 case Intrinsic::arm_neon_vqrshiftnsu: {
17423 EVT VT = N->getOperand(1).getValueType();
17424 int64_t Cnt;
17425 unsigned VShiftOpc = 0;
17426
17427 switch (IntNo) {
17428 case Intrinsic::arm_neon_vshifts:
17429 case Intrinsic::arm_neon_vshiftu:
17430 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17431 VShiftOpc = ARMISD::VSHLIMM;
17432 break;
17433 }
17434 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17435 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17436 : ARMISD::VSHRuIMM);
17437 break;
17438 }
17439 return SDValue();
17440
17441 case Intrinsic::arm_neon_vrshifts:
17442 case Intrinsic::arm_neon_vrshiftu:
17443 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17444 break;
17445 return SDValue();
17446
17447 case Intrinsic::arm_neon_vqshifts:
17448 case Intrinsic::arm_neon_vqshiftu:
17449 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17450 break;
17451 return SDValue();
17452
17453 case Intrinsic::arm_neon_vqshiftsu:
17454 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17455 break;
17456 llvm_unreachable("invalid shift count for vqshlu intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vqshlu intrinsic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17456)
;
17457
17458 case Intrinsic::arm_neon_vrshiftn:
17459 case Intrinsic::arm_neon_vqshiftns:
17460 case Intrinsic::arm_neon_vqshiftnu:
17461 case Intrinsic::arm_neon_vqshiftnsu:
17462 case Intrinsic::arm_neon_vqrshiftns:
17463 case Intrinsic::arm_neon_vqrshiftnu:
17464 case Intrinsic::arm_neon_vqrshiftnsu:
17465 // Narrowing shifts require an immediate right shift.
17466 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17467 break;
17468 llvm_unreachable("invalid shift count for narrowing vector shift "::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17469
)
17469 "intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17469
)
;
17470
17471 default:
17472 llvm_unreachable("unhandled vector shift")::llvm::llvm_unreachable_internal("unhandled vector shift", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17472)
;
17473 }
17474
17475 switch (IntNo) {
17476 case Intrinsic::arm_neon_vshifts:
17477 case Intrinsic::arm_neon_vshiftu:
17478 // Opcode already set above.
17479 break;
17480 case Intrinsic::arm_neon_vrshifts:
17481 VShiftOpc = ARMISD::VRSHRsIMM;
17482 break;
17483 case Intrinsic::arm_neon_vrshiftu:
17484 VShiftOpc = ARMISD::VRSHRuIMM;
17485 break;
17486 case Intrinsic::arm_neon_vrshiftn:
17487 VShiftOpc = ARMISD::VRSHRNIMM;
17488 break;
17489 case Intrinsic::arm_neon_vqshifts:
17490 VShiftOpc = ARMISD::VQSHLsIMM;
17491 break;
17492 case Intrinsic::arm_neon_vqshiftu:
17493 VShiftOpc = ARMISD::VQSHLuIMM;
17494 break;
17495 case Intrinsic::arm_neon_vqshiftsu:
17496 VShiftOpc = ARMISD::VQSHLsuIMM;
17497 break;
17498 case Intrinsic::arm_neon_vqshiftns:
17499 VShiftOpc = ARMISD::VQSHRNsIMM;
17500 break;
17501 case Intrinsic::arm_neon_vqshiftnu:
17502 VShiftOpc = ARMISD::VQSHRNuIMM;
17503 break;
17504 case Intrinsic::arm_neon_vqshiftnsu:
17505 VShiftOpc = ARMISD::VQSHRNsuIMM;
17506 break;
17507 case Intrinsic::arm_neon_vqrshiftns:
17508 VShiftOpc = ARMISD::VQRSHRNsIMM;
17509 break;
17510 case Intrinsic::arm_neon_vqrshiftnu:
17511 VShiftOpc = ARMISD::VQRSHRNuIMM;
17512 break;
17513 case Intrinsic::arm_neon_vqrshiftnsu:
17514 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17515 break;
17516 }
17517
17518 SDLoc dl(N);
17519 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17520 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17521 }
17522
17523 case Intrinsic::arm_neon_vshiftins: {
17524 EVT VT = N->getOperand(1).getValueType();
17525 int64_t Cnt;
17526 unsigned VShiftOpc = 0;
17527
17528 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17529 VShiftOpc = ARMISD::VSLIIMM;
17530 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17531 VShiftOpc = ARMISD::VSRIIMM;
17532 else {
17533 llvm_unreachable("invalid shift count for vsli/vsri intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vsli/vsri intrinsic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17533)
;
17534 }
17535
17536 SDLoc dl(N);
17537 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17538 N->getOperand(1), N->getOperand(2),
17539 DAG.getConstant(Cnt, dl, MVT::i32));
17540 }
17541
17542 case Intrinsic::arm_neon_vqrshifts:
17543 case Intrinsic::arm_neon_vqrshiftu:
17544 // No immediate versions of these to check for.
17545 break;
17546
17547 case Intrinsic::arm_mve_vqdmlah:
17548 case Intrinsic::arm_mve_vqdmlash:
17549 case Intrinsic::arm_mve_vqrdmlah:
17550 case Intrinsic::arm_mve_vqrdmlash:
17551 case Intrinsic::arm_mve_vmla_n_predicated:
17552 case Intrinsic::arm_mve_vmlas_n_predicated:
17553 case Intrinsic::arm_mve_vqdmlah_predicated:
17554 case Intrinsic::arm_mve_vqdmlash_predicated:
17555 case Intrinsic::arm_mve_vqrdmlah_predicated:
17556 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17557 // These intrinsics all take an i32 scalar operand which is narrowed to the
17558 // size of a single lane of the vector type they return. So we don't need
17559 // any bits of that operand above that point, which allows us to eliminate
17560 // uxth/sxth.
17561 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17562 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17563 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17564 return SDValue();
17565 break;
17566 }
17567
17568 case Intrinsic::arm_mve_minv:
17569 case Intrinsic::arm_mve_maxv:
17570 case Intrinsic::arm_mve_minav:
17571 case Intrinsic::arm_mve_maxav:
17572 case Intrinsic::arm_mve_minv_predicated:
17573 case Intrinsic::arm_mve_maxv_predicated:
17574 case Intrinsic::arm_mve_minav_predicated:
17575 case Intrinsic::arm_mve_maxav_predicated: {
17576 // These intrinsics all take an i32 scalar operand which is narrowed to the
17577 // size of a single lane of the vector type they take as the other input.
17578 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17579 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17580 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17581 return SDValue();
17582 break;
17583 }
17584
17585 case Intrinsic::arm_mve_addv: {
17586 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17587 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17588 bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
17589 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17590 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17591 }
17592
17593 case Intrinsic::arm_mve_addlv:
17594 case Intrinsic::arm_mve_addlv_predicated: {
17595 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17596 // which recombines the two outputs into an i64
17597 bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
17598 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17599 (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
17600 (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
17601
17602 SmallVector<SDValue, 4> Ops;
17603 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17604 if (i != 2) // skip the unsigned flag
17605 Ops.push_back(N->getOperand(i));
17606
17607 SDLoc dl(N);
17608 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17609 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17610 val.getValue(1));
17611 }
17612 }
17613
17614 return SDValue();
17615}
17616
17617/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17618/// lowers them. As with the vector shift intrinsics, this is done during DAG
17619/// combining instead of DAG legalizing because the build_vectors for 64-bit
17620/// vector element shift counts are generally not legal, and it is hard to see
17621/// their values after they get legalized to loads from a constant pool.
17622static SDValue PerformShiftCombine(SDNode *N,
17623 TargetLowering::DAGCombinerInfo &DCI,
17624 const ARMSubtarget *ST) {
17625 SelectionDAG &DAG = DCI.DAG;
17626 EVT VT = N->getValueType(0);
17627
17628 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17629 N->getOperand(0)->getOpcode() == ISD::AND &&
17630 N->getOperand(0)->hasOneUse()) {
17631 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17632 return SDValue();
17633 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17634 // usually show up because instcombine prefers to canonicalize it to
17635 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17636 // out of GEP lowering in some cases.
17637 SDValue N0 = N->getOperand(0);
17638 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17639 if (!ShiftAmtNode)
17640 return SDValue();
17641 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17642 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17643 if (!AndMaskNode)
17644 return SDValue();
17645 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17646 // Don't transform uxtb/uxth.
17647 if (AndMask == 255 || AndMask == 65535)
17648 return SDValue();
17649 if (isMask_32(AndMask)) {
17650 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17651 if (MaskedBits > ShiftAmt) {
17652 SDLoc DL(N);
17653 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17654 DAG.getConstant(MaskedBits, DL, MVT::i32));
17655 return DAG.getNode(
17656 ISD::SRL, DL, MVT::i32, SHL,
17657 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17658 }
17659 }
17660 }
17661
17662 // Nothing to be done for scalar shifts.
17663 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17664 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17665 return SDValue();
17666 if (ST->hasMVEIntegerOps())
17667 return SDValue();
17668
17669 int64_t Cnt;
17670
17671 switch (N->getOpcode()) {
17672 default: llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17672)
;
17673
17674 case ISD::SHL:
17675 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17676 SDLoc dl(N);
17677 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17678 DAG.getConstant(Cnt, dl, MVT::i32));
17679 }
17680 break;
17681
17682 case ISD::SRA:
17683 case ISD::SRL:
17684 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17685 unsigned VShiftOpc =
17686 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17687 SDLoc dl(N);
17688 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17689 DAG.getConstant(Cnt, dl, MVT::i32));
17690 }
17691 }
17692 return SDValue();
17693}
17694
17695// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17696// split into multiple extending loads, which are simpler to deal with than an
17697// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17698// to convert the type to an f32.
17699static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
17700 SDValue N0 = N->getOperand(0);
17701 if (N0.getOpcode() != ISD::LOAD)
17702 return SDValue();
17703 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17704 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17705 LD->getExtensionType() != ISD::NON_EXTLOAD)
17706 return SDValue();
17707 EVT FromVT = LD->getValueType(0);
17708 EVT ToVT = N->getValueType(0);
17709 if (!ToVT.isVector())
17710 return SDValue();
17711 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements())(static_cast <bool> (FromVT.getVectorNumElements() == ToVT
.getVectorNumElements()) ? void (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17711, __extension__
__PRETTY_FUNCTION__))
;
17712 EVT ToEltVT = ToVT.getVectorElementType();
17713 EVT FromEltVT = FromVT.getVectorElementType();
17714
17715 unsigned NumElements = 0;
17716 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17717 NumElements = 4;
17718 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17719 NumElements = 4;
17720 if (NumElements == 0 ||
17721 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17722 FromVT.getVectorNumElements() % NumElements != 0 ||
17723 !isPowerOf2_32(NumElements))
17724 return SDValue();
17725
17726 LLVMContext &C = *DAG.getContext();
17727 SDLoc DL(LD);
17728 // Details about the old load
17729 SDValue Ch = LD->getChain();
17730 SDValue BasePtr = LD->getBasePtr();
17731 Align Alignment = LD->getOriginalAlign();
17732 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17733 AAMDNodes AAInfo = LD->getAAInfo();
17734
17735 ISD::LoadExtType NewExtType =
17736 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17737 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17738 EVT NewFromVT = EVT::getVectorVT(
17739 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17740 EVT NewToVT = EVT::getVectorVT(
17741 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17742
17743 SmallVector<SDValue, 4> Loads;
17744 SmallVector<SDValue, 4> Chains;
17745 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17746 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17747 SDValue NewPtr =
17748 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
17749
17750 SDValue NewLoad =
17751 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17752 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17753 Alignment, MMOFlags, AAInfo);
17754 Loads.push_back(NewLoad);
17755 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17756 }
17757
17758 // Float truncs need to extended with VCVTB's into their floating point types.
17759 if (FromEltVT == MVT::f16) {
17760 SmallVector<SDValue, 4> Extends;
17761
17762 for (unsigned i = 0; i < Loads.size(); i++) {
17763 SDValue LoadBC =
17764 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17765 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17766 DAG.getConstant(0, DL, MVT::i32));
17767 Extends.push_back(FPExt);
17768 }
17769
17770 Loads = Extends;
17771 }
17772
17773 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17774 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17775 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17776}
17777
17778/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17779/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17780static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
17781 const ARMSubtarget *ST) {
17782 SDValue N0 = N->getOperand(0);
17783
17784 // Check for sign- and zero-extensions of vector extract operations of 8- and
17785 // 16-bit vector elements. NEON and MVE support these directly. They are
17786 // handled during DAG combining because type legalization will promote them
17787 // to 32-bit types and it is messy to recognize the operations after that.
17788 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17789 N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17790 SDValue Vec = N0.getOperand(0);
17791 SDValue Lane = N0.getOperand(1);
17792 EVT VT = N->getValueType(0);
17793 EVT EltVT = N0.getValueType();
17794 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17795
17796 if (VT == MVT::i32 &&
17797 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17798 TLI.isTypeLegal(Vec.getValueType()) &&
17799 isa<ConstantSDNode>(Lane)) {
17800
17801 unsigned Opc = 0;
17802 switch (N->getOpcode()) {
17803 default: llvm_unreachable("unexpected opcode")::llvm::llvm_unreachable_internal("unexpected opcode", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17803)
;
17804 case ISD::SIGN_EXTEND:
17805 Opc = ARMISD::VGETLANEs;
17806 break;
17807 case ISD::ZERO_EXTEND:
17808 case ISD::ANY_EXTEND:
17809 Opc = ARMISD::VGETLANEu;
17810 break;
17811 }
17812 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17813 }
17814 }
17815
17816 if (ST->hasMVEIntegerOps())
17817 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17818 return NewLoad;
17819
17820 return SDValue();
17821}
17822
17823static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
17824 const ARMSubtarget *ST) {
17825 if (ST->hasMVEFloatOps())
17826 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17827 return NewLoad;
17828
17829 return SDValue();
17830}
17831
17832// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17833// constant bounds.
17834static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
17835 const ARMSubtarget *Subtarget) {
17836 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17837 !Subtarget->isThumb2())
17838 return SDValue();
17839
17840 EVT VT = Op.getValueType();
17841 SDValue Op0 = Op.getOperand(0);
17842
17843 if (VT != MVT::i32 ||
17844 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17845 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17846 !isa<ConstantSDNode>(Op0.getOperand(1)))
17847 return SDValue();
17848
17849 SDValue Min = Op;
17850 SDValue Max = Op0;
17851 SDValue Input = Op0.getOperand(0);
17852 if (Min.getOpcode() == ISD::SMAX)
17853 std::swap(Min, Max);
17854
17855 APInt MinC = Min.getConstantOperandAPInt(1);
17856 APInt MaxC = Max.getConstantOperandAPInt(1);
17857
17858 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17859 !(MinC + 1).isPowerOf2())
17860 return SDValue();
17861
17862 SDLoc DL(Op);
17863 if (MinC == ~MaxC)
17864 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17865 DAG.getConstant(MinC.countr_one(), DL, VT));
17866 if (MaxC == 0)
17867 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17868 DAG.getConstant(MinC.countr_one(), DL, VT));
17869
17870 return SDValue();
17871}
17872
17873/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17874/// saturates.
17875static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
17876 const ARMSubtarget *ST) {
17877 EVT VT = N->getValueType(0);
17878 SDValue N0 = N->getOperand(0);
17879
17880 if (VT == MVT::i32)
17881 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17882
17883 if (!ST->hasMVEIntegerOps())
17884 return SDValue();
17885
17886 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17887 return V;
17888
17889 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17890 return SDValue();
17891
17892 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17893 // Check one is a smin and the other is a smax
17894 if (Min->getOpcode() != ISD::SMIN)
17895 std::swap(Min, Max);
17896 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17897 return false;
17898
17899 APInt SaturateC;
17900 if (VT == MVT::v4i32)
17901 SaturateC = APInt(32, (1 << 15) - 1, true);
17902 else //if (VT == MVT::v8i16)
17903 SaturateC = APInt(16, (1 << 7) - 1, true);
17904
17905 APInt MinC, MaxC;
17906 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17907 MinC != SaturateC)
17908 return false;
17909 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17910 MaxC != ~SaturateC)
17911 return false;
17912 return true;
17913 };
17914
17915 if (IsSignedSaturate(N, N0.getNode())) {
17916 SDLoc DL(N);
17917 MVT ExtVT, HalfVT;
17918 if (VT == MVT::v4i32) {
17919 HalfVT = MVT::v8i16;
17920 ExtVT = MVT::v4i16;
17921 } else { // if (VT == MVT::v8i16)
17922 HalfVT = MVT::v16i8;
17923 ExtVT = MVT::v8i8;
17924 }
17925
17926 // Create a VQMOVNB with undef top lanes, then signed extended into the top
17927 // half. That extend will hopefully be removed if only the bottom bits are
17928 // demanded (though a truncating store, for example).
17929 SDValue VQMOVN =
17930 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
17931 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
17932 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17933 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
17934 DAG.getValueType(ExtVT));
17935 }
17936
17937 auto IsUnsignedSaturate = [&](SDNode *Min) {
17938 // For unsigned, we just need to check for <= 0xffff
17939 if (Min->getOpcode() != ISD::UMIN)
17940 return false;
17941
17942 APInt SaturateC;
17943 if (VT == MVT::v4i32)
17944 SaturateC = APInt(32, (1 << 16) - 1, true);
17945 else //if (VT == MVT::v8i16)
17946 SaturateC = APInt(16, (1 << 8) - 1, true);
17947
17948 APInt MinC;
17949 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17950 MinC != SaturateC)
17951 return false;
17952 return true;
17953 };
17954
17955 if (IsUnsignedSaturate(N)) {
17956 SDLoc DL(N);
17957 MVT HalfVT;
17958 unsigned ExtConst;
17959 if (VT == MVT::v4i32) {
17960 HalfVT = MVT::v8i16;
17961 ExtConst = 0x0000FFFF;
17962 } else { //if (VT == MVT::v8i16)
17963 HalfVT = MVT::v16i8;
17964 ExtConst = 0x00FF;
17965 }
17966
17967 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
17968 // an AND. That extend will hopefully be removed if only the bottom bits are
17969 // demanded (though a truncating store, for example).
17970 SDValue VQMOVN =
17971 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
17972 DAG.getConstant(0, DL, MVT::i32));
17973 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17974 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
17975 DAG.getConstant(ExtConst, DL, VT));
17976 }
17977
17978 return SDValue();
17979}
17980
17981static const APInt *isPowerOf2Constant(SDValue V) {
17982 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
17983 if (!C)
17984 return nullptr;
17985 const APInt *CV = &C->getAPIntValue();
17986 return CV->isPowerOf2() ? CV : nullptr;
17987}
17988
17989SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
17990 // If we have a CMOV, OR and AND combination such as:
17991 // if (x & CN)
17992 // y |= CM;
17993 //
17994 // And:
17995 // * CN is a single bit;
17996 // * All bits covered by CM are known zero in y
17997 //
17998 // Then we can convert this into a sequence of BFI instructions. This will
17999 // always be a win if CM is a single bit, will always be no worse than the
18000 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18001 // three bits (due to the extra IT instruction).
18002
18003 SDValue Op0 = CMOV->getOperand(0);
18004 SDValue Op1 = CMOV->getOperand(1);
18005 auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
18006 auto CC = CCNode->getAPIntValue().getLimitedValue();
18007 SDValue CmpZ = CMOV->getOperand(4);
18008
18009 // The compare must be against zero.
18010 if (!isNullConstant(CmpZ->getOperand(1)))
18011 return SDValue();
18012
18013 assert(CmpZ->getOpcode() == ARMISD::CMPZ)(static_cast <bool> (CmpZ->getOpcode() == ARMISD::CMPZ
) ? void (0) : __assert_fail ("CmpZ->getOpcode() == ARMISD::CMPZ"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18013, __extension__
__PRETTY_FUNCTION__))
;
18014 SDValue And = CmpZ->getOperand(0);
18015 if (And->getOpcode() != ISD::AND)
18016 return SDValue();
18017 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18018 if (!AndC)
18019 return SDValue();
18020 SDValue X = And->getOperand(0);
18021
18022 if (CC == ARMCC::EQ) {
18023 // We're performing an "equal to zero" compare. Swap the operands so we
18024 // canonicalize on a "not equal to zero" compare.
18025 std::swap(Op0, Op1);
18026 } else {
18027 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?")(static_cast <bool> (CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"
) ? void (0) : __assert_fail ("CC == ARMCC::NE && \"How can a CMPZ node not be EQ or NE?\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18027, __extension__
__PRETTY_FUNCTION__))
;
18028 }
18029
18030 if (Op1->getOpcode() != ISD::OR)
18031 return SDValue();
18032
18033 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18034 if (!OrC)
18035 return SDValue();
18036 SDValue Y = Op1->getOperand(0);
18037
18038 if (Op0 != Y)
18039 return SDValue();
18040
18041 // Now, is it profitable to continue?
18042 APInt OrCI = OrC->getAPIntValue();
18043 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18044 if (OrCI.popcount() > Heuristic)
18045 return SDValue();
18046
18047 // Lastly, can we determine that the bits defined by OrCI
18048 // are zero in Y?
18049 KnownBits Known = DAG.computeKnownBits(Y);
18050 if ((OrCI & Known.Zero) != OrCI)
18051 return SDValue();
18052
18053 // OK, we can do the combine.
18054 SDValue V = Y;
18055 SDLoc dl(X);
18056 EVT VT = X.getValueType();
18057 unsigned BitInX = AndC->logBase2();
18058
18059 if (BitInX != 0) {
18060 // We must shift X first.
18061 X = DAG.getNode(ISD::SRL, dl, VT, X,
18062 DAG.getConstant(BitInX, dl, VT));
18063 }
18064
18065 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18066 BitInY < NumActiveBits; ++BitInY) {
18067 if (OrCI[BitInY] == 0)
18068 continue;
18069 APInt Mask(VT.getSizeInBits(), 0);
18070 Mask.setBit(BitInY);
18071 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18072 // Confusingly, the operand is an *inverted* mask.
18073 DAG.getConstant(~Mask, dl, VT));
18074 }
18075
18076 return V;
18077}
18078
18079// Given N, the value controlling the conditional branch, search for the loop
18080// intrinsic, returning it, along with how the value is used. We need to handle
18081// patterns such as the following:
18082// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18083// (brcond (setcc (loop.decrement), 0, eq), exit)
18084// (brcond (setcc (loop.decrement), 0, ne), header)
18085static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
18086 bool &Negate) {
18087 switch (N->getOpcode()) {
18088 default:
18089 break;
18090 case ISD::XOR: {
18091 if (!isa<ConstantSDNode>(N.getOperand(1)))
18092 return SDValue();
18093 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18094 return SDValue();
18095 Negate = !Negate;
18096 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18097 }
18098 case ISD::SETCC: {
18099 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18100 if (!Const)
18101 return SDValue();
18102 if (Const->isZero())
18103 Imm = 0;
18104 else if (Const->isOne())
18105 Imm = 1;
18106 else
18107 return SDValue();
18108 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18109 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18110 }
18111 case ISD::INTRINSIC_W_CHAIN: {
18112 unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
18113 if (IntOp != Intrinsic::test_start_loop_iterations &&
18114 IntOp != Intrinsic::loop_decrement_reg)
18115 return SDValue();
18116 return N;
18117 }
18118 }
18119 return SDValue();
18120}
18121
18122static SDValue PerformHWLoopCombine(SDNode *N,
18123 TargetLowering::DAGCombinerInfo &DCI,
18124 const ARMSubtarget *ST) {
18125
18126 // The hwloop intrinsics that we're interested are used for control-flow,
18127 // either for entering or exiting the loop:
18128 // - test.start.loop.iterations will test whether its operand is zero. If it
18129 // is zero, the proceeding branch should not enter the loop.
18130 // - loop.decrement.reg also tests whether its operand is zero. If it is
18131 // zero, the proceeding branch should not branch back to the beginning of
18132 // the loop.
18133 // So here, we need to check that how the brcond is using the result of each
18134 // of the intrinsics to ensure that we're branching to the right place at the
18135 // right time.
18136
18137 ISD::CondCode CC;
18138 SDValue Cond;
18139 int Imm = 1;
18140 bool Negate = false;
18141 SDValue Chain = N->getOperand(0);
18142 SDValue Dest;
18143
18144 if (N->getOpcode() == ISD::BRCOND) {
18145 CC = ISD::SETEQ;
18146 Cond = N->getOperand(1);
18147 Dest = N->getOperand(2);
18148 } else {
18149 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!")(static_cast <bool> (N->getOpcode() == ISD::BR_CC &&
"Expected BRCOND or BR_CC!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"Expected BRCOND or BR_CC!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18149, __extension__
__PRETTY_FUNCTION__))
;
18150 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18151 Cond = N->getOperand(2);
18152 Dest = N->getOperand(4);
18153 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18154 if (!Const->isOne() && !Const->isZero())
18155 return SDValue();
18156 Imm = Const->getZExtValue();
18157 } else
18158 return SDValue();
18159 }
18160
18161 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18162 if (!Int)
18163 return SDValue();
18164
18165 if (Negate)
18166 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18167
18168 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18169 return (CC == ISD::SETEQ && Imm == 0) ||
18170 (CC == ISD::SETNE && Imm == 1) ||
18171 (CC == ISD::SETLT && Imm == 1) ||
18172 (CC == ISD::SETULT && Imm == 1);
18173 };
18174
18175 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18176 return (CC == ISD::SETEQ && Imm == 1) ||
18177 (CC == ISD::SETNE && Imm == 0) ||
18178 (CC == ISD::SETGT && Imm == 0) ||
18179 (CC == ISD::SETUGT && Imm == 0) ||
18180 (CC == ISD::SETGE && Imm == 1) ||
18181 (CC == ISD::SETUGE && Imm == 1);
18182 };
18183
18184 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&(static_cast <bool> ((IsTrueIfZero(CC, Imm) || IsFalseIfZero
(CC, Imm)) && "unsupported condition") ? void (0) : __assert_fail
("(IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && \"unsupported condition\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18185, __extension__
__PRETTY_FUNCTION__))
18185 "unsupported condition")(static_cast <bool> ((IsTrueIfZero(CC, Imm) || IsFalseIfZero
(CC, Imm)) && "unsupported condition") ? void (0) : __assert_fail
("(IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && \"unsupported condition\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18185, __extension__
__PRETTY_FUNCTION__))
;
18186
18187 SDLoc dl(Int);
18188 SelectionDAG &DAG = DCI.DAG;
18189 SDValue Elements = Int.getOperand(2);
18190 unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
18191 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)(static_cast <bool> ((N->hasOneUse() && N->
use_begin()->getOpcode() == ISD::BR) && "expected single br user"
) ? void (0) : __assert_fail ("(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && \"expected single br user\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18192, __extension__
__PRETTY_FUNCTION__))
18192 && "expected single br user")(static_cast <bool> ((N->hasOneUse() && N->
use_begin()->getOpcode() == ISD::BR) && "expected single br user"
) ? void (0) : __assert_fail ("(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && \"expected single br user\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18192, __extension__
__PRETTY_FUNCTION__))
;
18193 SDNode *Br = *N->use_begin();
18194 SDValue OtherTarget = Br->getOperand(1);
18195
18196 // Update the unconditional branch to branch to the given Dest.
18197 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18198 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18199 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18200 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18201 };
18202
18203 if (IntOp == Intrinsic::test_start_loop_iterations) {
18204 SDValue Res;
18205 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18206 // We expect this 'instruction' to branch when the counter is zero.
18207 if (IsTrueIfZero(CC, Imm)) {
18208 SDValue Ops[] = {Chain, Setup, Dest};
18209 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18210 } else {
18211 // The logic is the reverse of what we need for WLS, so find the other
18212 // basic block target: the target of the proceeding br.
18213 UpdateUncondBr(Br, Dest, DAG);
18214
18215 SDValue Ops[] = {Chain, Setup, OtherTarget};
18216 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18217 }
18218 // Update LR count to the new value
18219 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18220 // Update chain
18221 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18222 return Res;
18223 } else {
18224 SDValue Size = DAG.getTargetConstant(
18225 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
18226 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18227 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18228 DAG.getVTList(MVT::i32, MVT::Other), Args);
18229 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18230
18231 // We expect this instruction to branch when the count is not zero.
18232 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18233
18234 // Update the unconditional branch to target the loop preheader if we've
18235 // found the condition has been reversed.
18236 if (Target == OtherTarget)
18237 UpdateUncondBr(Br, Dest, DAG);
18238
18239 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18240 SDValue(LoopDec.getNode(), 1), Chain);
18241
18242 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18243 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18244 }
18245 return SDValue();
18246}
18247
18248/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18249SDValue
18250ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
18251 SDValue Cmp = N->getOperand(4);
18252 if (Cmp.getOpcode() != ARMISD::CMPZ)
18253 // Only looking at NE cases.
18254 return SDValue();
18255
18256 EVT VT = N->getValueType(0);
18257 SDLoc dl(N);
18258 SDValue LHS = Cmp.getOperand(0);
18259 SDValue RHS = Cmp.getOperand(1);
18260 SDValue Chain = N->getOperand(0);
18261 SDValue BB = N->getOperand(1);
18262 SDValue ARMcc = N->getOperand(2);
18263 ARMCC::CondCodes CC =
18264 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
18265
18266 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18267 // -> (brcond Chain BB CC CPSR Cmp)
18268 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18269 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18270 LHS->getOperand(0)->hasOneUse() &&
18271 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18272 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18273 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18274 return DAG.getNode(
18275 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18276 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18277 }
18278
18279 return SDValue();
18280}
18281
18282/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18283SDValue
18284ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
18285 SDValue Cmp = N->getOperand(4);
18286 if (Cmp.getOpcode() != ARMISD::CMPZ)
18287 // Only looking at EQ and NE cases.
18288 return SDValue();
18289
18290 EVT VT = N->getValueType(0);
18291 SDLoc dl(N);
18292 SDValue LHS = Cmp.getOperand(0);
18293 SDValue RHS = Cmp.getOperand(1);
18294 SDValue FalseVal = N->getOperand(0);
18295 SDValue TrueVal = N->getOperand(1);
18296 SDValue ARMcc = N->getOperand(2);
18297 ARMCC::CondCodes CC =
18298 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
18299
18300 // BFI is only available on V6T2+.
18301 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18302 SDValue R = PerformCMOVToBFICombine(N, DAG);
18303 if (R)
18304 return R;
18305 }
18306
18307 // Simplify
18308 // mov r1, r0
18309 // cmp r1, x
18310 // mov r0, y
18311 // moveq r0, x
18312 // to
18313 // cmp r0, x
18314 // movne r0, y
18315 //
18316 // mov r1, r0
18317 // cmp r1, x
18318 // mov r0, x
18319 // movne r0, y
18320 // to
18321 // cmp r0, x
18322 // movne r0, y
18323 /// FIXME: Turn this into a target neutral optimization?
18324 SDValue Res;
18325 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18326 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18327 N->getOperand(3), Cmp);
18328 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18329 SDValue ARMcc;
18330 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18331 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18332 N->getOperand(3), NewCmp);
18333 }
18334
18335 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18336 // -> (cmov F T CC CPSR Cmp)
18337 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18338 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18339 isNullConstant(RHS)) {
18340 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18341 LHS->getOperand(2), LHS->getOperand(3),
18342 LHS->getOperand(4));
18343 }
18344
18345 if (!VT.isInteger())
18346 return SDValue();
18347
18348 // Fold away an unneccessary CMPZ/CMOV
18349 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18350 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18351 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18352 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18353 N->getConstantOperandVal(2) == ARMCC::NE) {
18354 ARMCC::CondCodes Cond;
18355 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18356 if (N->getConstantOperandVal(2) == ARMCC::NE)
18357 Cond = ARMCC::getOppositeCondition(Cond);
18358 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18359 N->getOperand(1),
18360 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18361 N->getOperand(3), C);
18362 }
18363 }
18364
18365 // Materialize a boolean comparison for integers so we can avoid branching.
18366 if (isNullConstant(FalseVal)) {
18367 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18368 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18369 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18370 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18371 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18372 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18373 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18374 DAG.getConstant(5, dl, MVT::i32));
18375 } else {
18376 // CMOV 0, 1, ==, (CMPZ x, y) ->
18377 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18378 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18379 //
18380 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18381 // x != y. In other words, a carry C == 1 when x == y, C == 0
18382 // otherwise.
18383 // The final UADDO_CARRY computes
18384 // x - y + (0 - (x - y)) + C == C
18385 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18386 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18387 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18388 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18389 // actually.
18390 SDValue Carry =
18391 DAG.getNode(ISD::SUB, dl, MVT::i32,
18392 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18393 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18394 }
18395 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18396 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18397 // This seems pointless but will allow us to combine it further below.
18398 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18399 SDValue Sub =
18400 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18401 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18402 Sub.getValue(1), SDValue());
18403 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18404 N->getOperand(3), CPSRGlue.getValue(1));
18405 FalseVal = Sub;
18406 }
18407 } else if (isNullConstant(TrueVal)) {
18408 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18409 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18410 // This seems pointless but will allow us to combine it further below
18411 // Note that we change == for != as this is the dual for the case above.
18412 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18413 SDValue Sub =
18414 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18415 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18416 Sub.getValue(1), SDValue());
18417 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18418 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18419 N->getOperand(3), CPSRGlue.getValue(1));
18420 FalseVal = Sub;
18421 }
18422 }
18423
18424 // On Thumb1, the DAG above may be further combined if z is a power of 2
18425 // (z == 2 ^ K).
18426 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
18427 // t1 = (USUBO (SUB x, y), 1)
18428 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18429 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18430 //
18431 // This also handles the special case of comparing against zero; it's
18432 // essentially, the same pattern, except there's no SUBS:
18433 // CMOV x, z, !=, (CMPZ x, 0) ->
18434 // t1 = (USUBO x, 1)
18435 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18436 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18437 const APInt *TrueConst;
18438 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18439 ((FalseVal.getOpcode() == ARMISD::SUBS &&
18440 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
18441 (FalseVal == LHS && isNullConstant(RHS))) &&
18442 (TrueConst = isPowerOf2Constant(TrueVal))) {
18443 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18444 unsigned ShiftAmount = TrueConst->logBase2();
18445 if (ShiftAmount)
18446 TrueVal = DAG.getConstant(1, dl, VT);
18447 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18448 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18449 Subc.getValue(1));
18450
18451 if (ShiftAmount)
18452 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18453 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18454 }
18455
18456 if (Res.getNode()) {
18457 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18458 // Capture demanded bits information that would be otherwise lost.
18459 if (Known.Zero == 0xfffffffe)
18460 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18461 DAG.getValueType(MVT::i1));
18462 else if (Known.Zero == 0xffffff00)
18463 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18464 DAG.getValueType(MVT::i8));
18465 else if (Known.Zero == 0xffff0000)
18466 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18467 DAG.getValueType(MVT::i16));
18468 }
18469
18470 return Res;
18471}
18472
18473static SDValue PerformBITCASTCombine(SDNode *N,
18474 TargetLowering::DAGCombinerInfo &DCI,
18475 const ARMSubtarget *ST) {
18476 SelectionDAG &DAG = DCI.DAG;
18477 SDValue Src = N->getOperand(0);
18478 EVT DstVT = N->getValueType(0);
18479
18480 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18481 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18482 EVT SrcVT = Src.getValueType();
18483 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18484 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18485 }
18486
18487 // We may have a bitcast of something that has already had this bitcast
18488 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18489 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18490 Src = Src.getOperand(0);
18491
18492 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18493 // would be generated is at least the width of the element type.
18494 EVT SrcVT = Src.getValueType();
18495 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18496 Src.getOpcode() == ARMISD::VMVNIMM ||
18497 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18498 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18499 DAG.getDataLayout().isBigEndian())
18500 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18501
18502 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18503 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18504 return R;
18505
18506 return SDValue();
18507}
18508
18509// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18510// node into stack operations after legalizeOps.
18511SDValue ARMTargetLowering::PerformMVETruncCombine(
18512 SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {
18513 SelectionDAG &DAG = DCI.DAG;
18514 EVT VT = N->getValueType(0);
18515 SDLoc DL(N);
18516
18517 // MVETrunc(Undef, Undef) -> Undef
18518 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18519 return DAG.getUNDEF(VT);
18520
18521 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18522 if (N->getNumOperands() == 2 &&
18523 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18524 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18525 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18526 N->getOperand(0).getOperand(1),
18527 N->getOperand(1).getOperand(0),
18528 N->getOperand(1).getOperand(1));
18529
18530 // MVETrunc(shuffle, shuffle) -> VMOVN
18531 if (N->getNumOperands() == 2 &&
18532 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18533 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18534 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18535 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18536
18537 if (S0->getOperand(0) == S1->getOperand(0) &&
18538 S0->getOperand(1) == S1->getOperand(1)) {
18539 // Construct complete shuffle mask
18540 SmallVector<int, 8> Mask(S0->getMask());
18541 Mask.append(S1->getMask().begin(), S1->getMask().end());
18542
18543 if (isVMOVNTruncMask(Mask, VT, false))
18544 return DAG.getNode(
18545 ARMISD::VMOVN, DL, VT,
18546 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18547 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18548 DAG.getConstant(1, DL, MVT::i32));
18549 if (isVMOVNTruncMask(Mask, VT, true))
18550 return DAG.getNode(
18551 ARMISD::VMOVN, DL, VT,
18552 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18553 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18554 DAG.getConstant(1, DL, MVT::i32));
18555 }
18556 }
18557
18558 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18559 // truncate to a buildvector to allow the generic optimisations to kick in.
18560 if (all_of(N->ops(), [](SDValue Op) {
18561 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18562 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18563 (Op.getOpcode() == ISD::BITCAST &&
18564 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18565 })) {
18566 SmallVector<SDValue, 8> Extracts;
18567 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18568 SDValue O = N->getOperand(Op);
18569 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18570 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18571 DAG.getConstant(i, DL, MVT::i32));
18572 Extracts.push_back(Ext);
18573 }
18574 }
18575 return DAG.getBuildVector(VT, DL, Extracts);
18576 }
18577
18578 // If we are late in the legalization process and nothing has optimised
18579 // the trunc to anything better, lower it to a stack store and reload,
18580 // performing the truncation whilst keeping the lanes in the correct order:
18581 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18582 if (!DCI.isAfterLegalizeDAG())
18583 return SDValue();
18584
18585 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
18586 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18587 int NumIns = N->getNumOperands();
18588 assert((NumIns == 2 || NumIns == 4) &&(static_cast <bool> ((NumIns == 2 || NumIns == 4) &&
"Expected 2 or 4 inputs to an MVETrunc") ? void (0) : __assert_fail
("(NumIns == 2 || NumIns == 4) && \"Expected 2 or 4 inputs to an MVETrunc\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18589, __extension__
__PRETTY_FUNCTION__))
18589 "Expected 2 or 4 inputs to an MVETrunc")(static_cast <bool> ((NumIns == 2 || NumIns == 4) &&
"Expected 2 or 4 inputs to an MVETrunc") ? void (0) : __assert_fail
("(NumIns == 2 || NumIns == 4) && \"Expected 2 or 4 inputs to an MVETrunc\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18589, __extension__
__PRETTY_FUNCTION__))
;
18590 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18591 if (N->getNumOperands() == 4)
18592 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18593
18594 SmallVector<SDValue> Chains;
18595 for (int I = 0; I < NumIns; I++) {
18596 SDValue Ptr = DAG.getNode(
18597 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18598 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18599 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(
18600 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18601 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18602 Ptr, MPI, StoreVT, Align(4));
18603 Chains.push_back(Ch);
18604 }
18605
18606 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18607 MachinePointerInfo MPI =
18608 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI, 0);
18609 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18610}
18611
18612// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18613static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N,
18614 SelectionDAG &DAG) {
18615 SDValue N0 = N->getOperand(0);
18616 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18617 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18618 return SDValue();
18619
18620 EVT FromVT = LD->getMemoryVT();
18621 EVT ToVT = N->getValueType(0);
18622 if (!ToVT.isVector())
18623 return SDValue();
18624 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2)(static_cast <bool> (FromVT.getVectorNumElements() == ToVT
.getVectorNumElements() * 2) ? void (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18624, __extension__
__PRETTY_FUNCTION__))
;
18625 EVT ToEltVT = ToVT.getVectorElementType();
18626 EVT FromEltVT = FromVT.getVectorElementType();
18627
18628 unsigned NumElements = 0;
18629 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18630 NumElements = 4;
18631 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18632 NumElements = 8;
18633 assert(NumElements != 0)(static_cast <bool> (NumElements != 0) ? void (0) : __assert_fail
("NumElements != 0", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18633, __extension__ __PRETTY_FUNCTION__))
;
18634
18635 ISD::LoadExtType NewExtType =
18636 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18637 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18638 LD->getExtensionType() != ISD::EXTLOAD &&
18639 LD->getExtensionType() != NewExtType)
18640 return SDValue();
18641
18642 LLVMContext &C = *DAG.getContext();
18643 SDLoc DL(LD);
18644 // Details about the old load
18645 SDValue Ch = LD->getChain();
18646 SDValue BasePtr = LD->getBasePtr();
18647 Align Alignment = LD->getOriginalAlign();
18648 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18649 AAMDNodes AAInfo = LD->getAAInfo();
18650
18651 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18652 EVT NewFromVT = EVT::getVectorVT(
18653 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18654 EVT NewToVT = EVT::getVectorVT(
18655 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18656
18657 SmallVector<SDValue, 4> Loads;
18658 SmallVector<SDValue, 4> Chains;
18659 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18660 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18661 SDValue NewPtr =
18662 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
18663
18664 SDValue NewLoad =
18665 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18666 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18667 Alignment, MMOFlags, AAInfo);
18668 Loads.push_back(NewLoad);
18669 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18670 }
18671
18672 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18673 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18674 return DAG.getMergeValues(Loads, DL);
18675}
18676
18677// Perform combines for MVEEXT. If it has not be optimized to anything better
18678// before lowering, it gets converted to stack store and extloads performing the
18679// extend whilst still keeping the same lane ordering.
18680SDValue ARMTargetLowering::PerformMVEExtCombine(
18681 SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {
18682 SelectionDAG &DAG = DCI.DAG;
18683 EVT VT = N->getValueType(0);
18684 SDLoc DL(N);
18685 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements")(static_cast <bool> (N->getNumValues() == 2 &&
"Expected MVEEXT with 2 elements") ? void (0) : __assert_fail
("N->getNumValues() == 2 && \"Expected MVEEXT with 2 elements\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18685, __extension__
__PRETTY_FUNCTION__))
;
18686 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type")(static_cast <bool> ((VT == MVT::v4i32 || VT == MVT::v8i16
) && "Unexpected MVEEXT type") ? void (0) : __assert_fail
("(VT == MVT::v4i32 || VT == MVT::v8i16) && \"Unexpected MVEEXT type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18686, __extension__
__PRETTY_FUNCTION__))
;
18687
18688 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18689 *DAG.getContext());
18690 auto Extend = [&](SDValue V) {
18691 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18692 return N->getOpcode() == ARMISD::MVESEXT
18693 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18694 DAG.getValueType(ExtVT))
18695 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18696 };
18697
18698 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18699 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18700 SDValue Ext = Extend(N->getOperand(0));
18701 return DAG.getMergeValues({Ext, Ext}, DL);
18702 }
18703
18704 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18705 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18706 ArrayRef<int> Mask = SVN->getMask();
18707 assert(Mask.size() == 2 * VT.getVectorNumElements())(static_cast <bool> (Mask.size() == 2 * VT.getVectorNumElements
()) ? void (0) : __assert_fail ("Mask.size() == 2 * VT.getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18707, __extension__
__PRETTY_FUNCTION__))
;
18708 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements())(static_cast <bool> (Mask.size() == SVN->getValueType
(0).getVectorNumElements()) ? void (0) : __assert_fail ("Mask.size() == SVN->getValueType(0).getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18708, __extension__
__PRETTY_FUNCTION__))
;
18709 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18710 SDValue Op0 = SVN->getOperand(0);
18711 SDValue Op1 = SVN->getOperand(1);
18712
18713 auto CheckInregMask = [&](int Start, int Offset) {
18714 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18715 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18716 return false;
18717 return true;
18718 };
18719 SDValue V0 = SDValue(N, 0);
18720 SDValue V1 = SDValue(N, 1);
18721 if (CheckInregMask(0, 0))
18722 V0 = Extend(Op0);
18723 else if (CheckInregMask(0, 1))
18724 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18725 else if (CheckInregMask(0, Mask.size()))
18726 V0 = Extend(Op1);
18727 else if (CheckInregMask(0, Mask.size() + 1))
18728 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18729
18730 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18731 V1 = Extend(Op1);
18732 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18733 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18734 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18735 V1 = Extend(Op0);
18736 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18737 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18738
18739 if (V0.getNode() != N || V1.getNode() != N)
18740 return DAG.getMergeValues({V0, V1}, DL);
18741 }
18742
18743 // MVEEXT(load) -> extload, extload
18744 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18745 if (SDValue L = PerformSplittingMVEEXTToWideningLoad(N, DAG))
18746 return L;
18747
18748 if (!DCI.isAfterLegalizeDAG())
18749 return SDValue();
18750
18751 // Lower to a stack store and reload:
18752 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18753 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
18754 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18755 int NumOuts = N->getNumValues();
18756 assert((NumOuts == 2 || NumOuts == 4) &&(static_cast <bool> ((NumOuts == 2 || NumOuts == 4) &&
"Expected 2 or 4 outputs to an MVEEXT") ? void (0) : __assert_fail
("(NumOuts == 2 || NumOuts == 4) && \"Expected 2 or 4 outputs to an MVEEXT\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18757, __extension__
__PRETTY_FUNCTION__))
18757 "Expected 2 or 4 outputs to an MVEEXT")(static_cast <bool> ((NumOuts == 2 || NumOuts == 4) &&
"Expected 2 or 4 outputs to an MVEEXT") ? void (0) : __assert_fail
("(NumOuts == 2 || NumOuts == 4) && \"Expected 2 or 4 outputs to an MVEEXT\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18757, __extension__
__PRETTY_FUNCTION__))
;
18758 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18759 *DAG.getContext());
18760 if (N->getNumOperands() == 4)
18761 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18762
18763 MachinePointerInfo MPI =
18764 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI, 0);
18765 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18766 StackPtr, MPI, Align(4));
18767
18768 SmallVector<SDValue> Loads;
18769 for (int I = 0; I < NumOuts; I++) {
18770 SDValue Ptr = DAG.getNode(
18771 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18772 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18773 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(
18774 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18775 SDValue Load = DAG.getExtLoad(
18776 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18777 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18778 Loads.push_back(Load);
18779 }
18780
18781 return DAG.getMergeValues(Loads, DL);
18782}
18783
18784SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
18785 DAGCombinerInfo &DCI) const {
18786 switch (N->getOpcode()) {
18787 default: break;
18788 case ISD::SELECT_CC:
18789 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18790 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18791 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18792 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
18793 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18794 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18795 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18796 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18797 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18798 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18799 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18800 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18801 case ISD::BRCOND:
18802 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18803 case ARMISD::ADDC:
18804 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18805 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18806 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18807 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18808 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18809 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18810 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18811 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18812 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18813 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
18814 case ISD::EXTRACT_VECTOR_ELT:
18815 return PerformExtractEltCombine(N, DCI, Subtarget);
18816 case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
18817 case ISD::INSERT_SUBVECTOR: return PerformInsertSubvectorCombine(N, DCI);
18818 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
18819 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18820 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18821 case ISD::FP_TO_SINT:
18822 case ISD::FP_TO_UINT:
18823 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18824 case ISD::FADD:
18825 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18826 case ISD::FDIV:
18827 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
18828 case ISD::INTRINSIC_WO_CHAIN:
18829 return PerformIntrinsicCombine(N, DCI);
18830 case ISD::SHL:
18831 case ISD::SRA:
18832 case ISD::SRL:
18833 return PerformShiftCombine(N, DCI, Subtarget);
18834 case ISD::SIGN_EXTEND:
18835 case ISD::ZERO_EXTEND:
18836 case ISD::ANY_EXTEND:
18837 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18838 case ISD::FP_EXTEND:
18839 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18840 case ISD::SMIN:
18841 case ISD::UMIN:
18842 case ISD::SMAX:
18843 case ISD::UMAX:
18844 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18845 case ARMISD::CMOV:
18846 return PerformCMOVCombine(N, DCI.DAG);
18847 case ARMISD::BRCOND:
18848 return PerformBRCONDCombine(N, DCI.DAG);
18849 case ARMISD::CMPZ:
18850 return PerformCMPZCombine(N, DCI.DAG);
18851 case ARMISD::CSINC:
18852 case ARMISD::CSINV:
18853 case ARMISD::CSNEG:
18854 return PerformCSETCombine(N, DCI.DAG);
18855 case ISD::LOAD:
18856 return PerformLOADCombine(N, DCI, Subtarget);
18857 case ARMISD::VLD1DUP:
18858 case ARMISD::VLD2DUP:
18859 case ARMISD::VLD3DUP:
18860 case ARMISD::VLD4DUP:
18861 return PerformVLDCombine(N, DCI);
18862 case ARMISD::BUILD_VECTOR:
18863 return PerformARMBUILD_VECTORCombine(N, DCI);
18864 case ISD::BITCAST:
18865 return PerformBITCASTCombine(N, DCI, Subtarget);
18866 case ARMISD::PREDICATE_CAST:
18867 return PerformPREDICATE_CASTCombine(N, DCI);
18868 case ARMISD::VECTOR_REG_CAST:
18869 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18870 case ARMISD::MVETRUNC:
18871 return PerformMVETruncCombine(N, DCI);
18872 case ARMISD::MVESEXT:
18873 case ARMISD::MVEZEXT:
18874 return PerformMVEExtCombine(N, DCI);
18875 case ARMISD::VCMP:
18876 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18877 case ISD::VECREDUCE_ADD:
18878 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18879 case ARMISD::VADDVs:
18880 case ARMISD::VADDVu:
18881 case ARMISD::VADDLVs:
18882 case ARMISD::VADDLVu:
18883 case ARMISD::VADDLVAs:
18884 case ARMISD::VADDLVAu:
18885 case ARMISD::VMLAVs:
18886 case ARMISD::VMLAVu:
18887 case ARMISD::VMLALVs:
18888 case ARMISD::VMLALVu:
18889 case ARMISD::VMLALVAs:
18890 case ARMISD::VMLALVAu:
18891 return PerformReduceShuffleCombine(N, DCI.DAG);
18892 case ARMISD::VMOVN:
18893 return PerformVMOVNCombine(N, DCI);
18894 case ARMISD::VQMOVNs:
18895 case ARMISD::VQMOVNu:
18896 return PerformVQMOVNCombine(N, DCI);
18897 case ARMISD::VQDMULH:
18898 return PerformVQDMULHCombine(N, DCI);
18899 case ARMISD::ASRL:
18900 case ARMISD::LSRL:
18901 case ARMISD::LSLL:
18902 return PerformLongShiftCombine(N, DCI.DAG);
18903 case ARMISD::SMULWB: {
18904 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18905 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18906 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18907 return SDValue();
18908 break;
18909 }
18910 case ARMISD::SMULWT: {
18911 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18912 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18913 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18914 return SDValue();
18915 break;
18916 }
18917 case ARMISD::SMLALBB:
18918 case ARMISD::QADD16b:
18919 case ARMISD::QSUB16b:
18920 case ARMISD::UQADD16b:
18921 case ARMISD::UQSUB16b: {
18922 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18923 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18924 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18925 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18926 return SDValue();
18927 break;
18928 }
18929 case ARMISD::SMLALBT: {
18930 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
18931 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18932 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
18933 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18934 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
18935 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
18936 return SDValue();
18937 break;
18938 }
18939 case ARMISD::SMLALTB: {
18940 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
18941 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18942 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
18943 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18944 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
18945 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
18946 return SDValue();
18947 break;
18948 }
18949 case ARMISD::SMLALTT: {
18950 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18951 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18952 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18953 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18954 return SDValue();
18955 break;
18956 }
18957 case ARMISD::QADD8b:
18958 case ARMISD::QSUB8b:
18959 case ARMISD::UQADD8b:
18960 case ARMISD::UQSUB8b: {
18961 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18962 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
18963 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18964 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18965 return SDValue();
18966 break;
18967 }
18968 case ISD::INTRINSIC_VOID:
18969 case ISD::INTRINSIC_W_CHAIN:
18970 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
18971 case Intrinsic::arm_neon_vld1:
18972 case Intrinsic::arm_neon_vld1x2:
18973 case Intrinsic::arm_neon_vld1x3:
18974 case Intrinsic::arm_neon_vld1x4:
18975 case Intrinsic::arm_neon_vld2:
18976 case Intrinsic::arm_neon_vld3:
18977 case Intrinsic::arm_neon_vld4:
18978 case Intrinsic::arm_neon_vld2lane:
18979 case Intrinsic::arm_neon_vld3lane:
18980 case Intrinsic::arm_neon_vld4lane:
18981 case Intrinsic::arm_neon_vld2dup:
18982 case Intrinsic::arm_neon_vld3dup:
18983 case Intrinsic::arm_neon_vld4dup:
18984 case Intrinsic::arm_neon_vst1:
18985 case Intrinsic::arm_neon_vst1x2:
18986 case Intrinsic::arm_neon_vst1x3:
18987 case Intrinsic::arm_neon_vst1x4:
18988 case Intrinsic::arm_neon_vst2:
18989 case Intrinsic::arm_neon_vst3:
18990 case Intrinsic::arm_neon_vst4:
18991 case Intrinsic::arm_neon_vst2lane:
18992 case Intrinsic::arm_neon_vst3lane:
18993 case Intrinsic::arm_neon_vst4lane:
18994 return PerformVLDCombine(N, DCI);
18995 case Intrinsic::arm_mve_vld2q:
18996 case Intrinsic::arm_mve_vld4q:
18997 case Intrinsic::arm_mve_vst2q:
18998 case Intrinsic::arm_mve_vst4q:
18999 return PerformMVEVLDCombine(N, DCI);
19000 default: break;
19001 }
19002 break;
19003 }
19004 return SDValue();
19005}
19006
19007bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
19008 EVT VT) const {
19009 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19010}
19011
19012bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
19013 Align Alignment,
19014 MachineMemOperand::Flags,
19015 unsigned *Fast) const {
19016 // Depends what it gets converted into if the type is weird.
19017 if (!VT.isSimple())
19018 return false;
19019
19020 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19021 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19022 auto Ty = VT.getSimpleVT().SimpleTy;
19023
19024 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19025 // Unaligned access can use (for example) LRDB, LRDH, LDR
19026 if (AllowsUnaligned) {
19027 if (Fast)
19028 *Fast = Subtarget->hasV7Ops();
19029 return true;
19030 }
19031 }
19032
19033 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19034 // For any little-endian targets with neon, we can support unaligned ld/st
19035 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19036 // A big-endian target may also explicitly support unaligned accesses
19037 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19038 if (Fast)
19039 *Fast = 1;
19040 return true;
19041 }
19042 }
19043
19044 if (!Subtarget->hasMVEIntegerOps())
19045 return false;
19046
19047 // These are for predicates
19048 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19049 Ty == MVT::v2i1)) {
19050 if (Fast)
19051 *Fast = 1;
19052 return true;
19053 }
19054
19055 // These are for truncated stores/narrowing loads. They are fine so long as
19056 // the alignment is at least the size of the item being loaded
19057 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19058 Alignment >= VT.getScalarSizeInBits() / 8) {
19059 if (Fast)
19060 *Fast = true;
19061 return true;
19062 }
19063
19064 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19065 // VSTRW.U32 all store the vector register in exactly the same format, and
19066 // differ only in the range of their immediate offset field and the required
19067 // alignment. So there is always a store that can be used, regardless of
19068 // actual type.
19069 //
19070 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19071 // VREV64.8) pair and get the same effect. This will likely be better than
19072 // aligning the vector through the stack.
19073 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19074 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19075 Ty == MVT::v2f64) {
19076 if (Fast)
19077 *Fast = 1;
19078 return true;
19079 }
19080
19081 return false;
19082}
19083
19084
19085EVT ARMTargetLowering::getOptimalMemOpType(
19086 const MemOp &Op, const AttributeList &FuncAttributes) const {
19087 // See if we can use NEON instructions for this...
19088 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19089 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19090 unsigned Fast;
19091 if (Op.size() >= 16 &&
19092 (Op.isAligned(Align(16)) ||
19093 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19094 MachineMemOperand::MONone, &Fast) &&
19095 Fast))) {
19096 return MVT::v2f64;
19097 } else if (Op.size() >= 8 &&
19098 (Op.isAligned(Align(8)) ||
19099 (allowsMisalignedMemoryAccesses(
19100 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19101 Fast))) {
19102 return MVT::f64;
19103 }
19104 }
19105
19106 // Let the target-independent logic figure it out.
19107 return MVT::Other;
19108}
19109
19110// 64-bit integers are split into their high and low parts and held in two
19111// different registers, so the trunc is free since the low register can just
19112// be used.
19113bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19114 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19115 return false;
19116 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19117 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19118 return (SrcBits == 64 && DestBits == 32);
19119}
19120
19121bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
19122 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19123 !DstVT.isInteger())
19124 return false;
19125 unsigned SrcBits = SrcVT.getSizeInBits();
19126 unsigned DestBits = DstVT.getSizeInBits();
19127 return (SrcBits == 64 && DestBits == 32);
19128}
19129
19130bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
19131 if (Val.getOpcode() != ISD::LOAD)
19132 return false;
19133
19134 EVT VT1 = Val.getValueType();
19135 if (!VT1.isSimple() || !VT1.isInteger() ||
19136 !VT2.isSimple() || !VT2.isInteger())
19137 return false;
19138
19139 switch (VT1.getSimpleVT().SimpleTy) {
19140 default: break;
19141 case MVT::i1:
19142 case MVT::i8:
19143 case MVT::i16:
19144 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19145 return true;
19146 }
19147
19148 return false;
19149}
19150
19151bool ARMTargetLowering::isFNegFree(EVT VT) const {
19152 if (!VT.isSimple())
19153 return false;
19154
19155 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19156 // negate values directly (fneg is free). So, we don't want to let the DAG
19157 // combiner rewrite fneg into xors and some other instructions. For f16 and
19158 // FullFP16 argument passing, some bitcast nodes may be introduced,
19159 // triggering this DAG combine rewrite, so we are avoiding that with this.
19160 switch (VT.getSimpleVT().SimpleTy) {
19161 default: break;
19162 case MVT::f16:
19163 return Subtarget->hasFullFP16();
19164 }
19165
19166 return false;
19167}
19168
19169/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19170/// of the vector elements.
19171static bool areExtractExts(Value *Ext1, Value *Ext2) {
19172 auto areExtDoubled = [](Instruction *Ext) {
19173 return Ext->getType()->getScalarSizeInBits() ==
19174 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19175 };
19176
19177 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19178 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19179 !areExtDoubled(cast<Instruction>(Ext1)) ||
19180 !areExtDoubled(cast<Instruction>(Ext2)))
19181 return false;
19182
19183 return true;
19184}
19185
19186/// Check if sinking \p I's operands to I's basic block is profitable, because
19187/// the operands can be folded into a target instruction, e.g.
19188/// sext/zext can be folded into vsubl.
19189bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
19190 SmallVectorImpl<Use *> &Ops) const {
19191 if (!I->getType()->isVectorTy())
19192 return false;
19193
19194 if (Subtarget->hasNEON()) {
19195 switch (I->getOpcode()) {
19196 case Instruction::Sub:
19197 case Instruction::Add: {
19198 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19199 return false;
19200 Ops.push_back(&I->getOperandUse(0));
19201 Ops.push_back(&I->getOperandUse(1));
19202 return true;
19203 }
19204 default:
19205 return false;
19206 }
19207 }
19208
19209 if (!Subtarget->hasMVEIntegerOps())
19210 return false;
19211
19212 auto IsFMSMul = [&](Instruction *I) {
19213 if (!I->hasOneUse())
19214 return false;
19215 auto *Sub = cast<Instruction>(*I->users().begin());
19216 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19217 };
19218 auto IsFMS = [&](Instruction *I) {
19219 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19220 match(I->getOperand(1), m_FNeg(m_Value())))
19221 return true;
19222 return false;
19223 };
19224
19225 auto IsSinker = [&](Instruction *I, int Operand) {
19226 switch (I->getOpcode()) {
19227 case Instruction::Add:
19228 case Instruction::Mul:
19229 case Instruction::FAdd:
19230 case Instruction::ICmp:
19231 case Instruction::FCmp:
19232 return true;
19233 case Instruction::FMul:
19234 return !IsFMSMul(I);
19235 case Instruction::Sub:
19236 case Instruction::FSub:
19237 case Instruction::Shl:
19238 case Instruction::LShr:
19239 case Instruction::AShr:
19240 return Operand == 1;
19241 case Instruction::Call:
19242 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19243 switch (II->getIntrinsicID()) {
19244 case Intrinsic::fma:
19245 return !IsFMS(I);
19246 case Intrinsic::sadd_sat:
19247 case Intrinsic::uadd_sat:
19248 case Intrinsic::arm_mve_add_predicated:
19249 case Intrinsic::arm_mve_mul_predicated:
19250 case Intrinsic::arm_mve_qadd_predicated:
19251 case Intrinsic::arm_mve_vhadd:
19252 case Intrinsic::arm_mve_hadd_predicated:
19253 case Intrinsic::arm_mve_vqdmull:
19254 case Intrinsic::arm_mve_vqdmull_predicated:
19255 case Intrinsic::arm_mve_vqdmulh:
19256 case Intrinsic::arm_mve_qdmulh_predicated:
19257 case Intrinsic::arm_mve_vqrdmulh:
19258 case Intrinsic::arm_mve_qrdmulh_predicated:
19259 case Intrinsic::arm_mve_fma_predicated:
19260 return true;
19261 case Intrinsic::ssub_sat:
19262 case Intrinsic::usub_sat:
19263 case Intrinsic::arm_mve_sub_predicated:
19264 case Intrinsic::arm_mve_qsub_predicated:
19265 case Intrinsic::arm_mve_hsub_predicated:
19266 case Intrinsic::arm_mve_vhsub:
19267 return Operand == 1;
19268 default:
19269 return false;
19270 }
19271 }
19272 return false;
19273 default:
19274 return false;
19275 }
19276 };
19277
19278 for (auto OpIdx : enumerate(I->operands())) {
19279 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19280 // Make sure we are not already sinking this operand
19281 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19282 continue;
19283
19284 Instruction *Shuffle = Op;
19285 if (Shuffle->getOpcode() == Instruction::BitCast)
19286 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19287 // We are looking for a splat that can be sunk.
19288 if (!Shuffle ||
19289 !match(Shuffle, m_Shuffle(
19290 m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
19291 m_Undef(), m_ZeroMask())))
19292 continue;
19293 if (!IsSinker(I, OpIdx.index()))
19294 continue;
19295
19296 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19297 // and vector registers
19298 for (Use &U : Op->uses()) {
19299 Instruction *Insn = cast<Instruction>(U.getUser());
19300 if (!IsSinker(Insn, U.getOperandNo()))
19301 return false;
19302 }
19303
19304 Ops.push_back(&Shuffle->getOperandUse(0));
19305 if (Shuffle != Op)
19306 Ops.push_back(&Op->getOperandUse(0));
19307 Ops.push_back(&OpIdx.value());
19308 }
19309 return true;
19310}
19311
19312Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const {
19313 if (!Subtarget->hasMVEIntegerOps())
19314 return nullptr;
19315 Type *SVIType = SVI->getType();
19316 Type *ScalarType = SVIType->getScalarType();
19317
19318 if (ScalarType->isFloatTy())
19319 return Type::getInt32Ty(SVIType->getContext());
19320 if (ScalarType->isHalfTy())
19321 return Type::getInt16Ty(SVIType->getContext());
19322 return nullptr;
19323}
19324
19325bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
19326 EVT VT = ExtVal.getValueType();
19327
19328 if (!isTypeLegal(VT))
19329 return false;
19330
19331 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19332 if (Ld->isExpandingLoad())
19333 return false;
19334 }
19335
19336 if (Subtarget->hasMVEIntegerOps())
19337 return true;
19338
19339 // Don't create a loadext if we can fold the extension into a wide/long
19340 // instruction.
19341 // If there's more than one user instruction, the loadext is desirable no
19342 // matter what. There can be two uses by the same instruction.
19343 if (ExtVal->use_empty() ||
19344 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19345 return true;
19346
19347 SDNode *U = *ExtVal->use_begin();
19348 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19349 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19350 return false;
19351
19352 return true;
19353}
19354
19355bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
19356 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19357 return false;
19358
19359 if (!isTypeLegal(EVT::getEVT(Ty1)))
19360 return false;
19361
19362 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop")(static_cast <bool> (Ty1->getPrimitiveSizeInBits() <=
64 && "i128 is probably not a noop") ? void (0) : __assert_fail
("Ty1->getPrimitiveSizeInBits() <= 64 && \"i128 is probably not a noop\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19362, __extension__
__PRETTY_FUNCTION__))
;
19363
19364 // Assuming the caller doesn't have a zeroext or signext return parameter,
19365 // truncation all the way down to i1 is valid.
19366 return true;
19367}
19368
19369/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19370/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19371/// expanded to FMAs when this method returns true, otherwise fmuladd is
19372/// expanded to fmul + fadd.
19373///
19374/// ARM supports both fused and unfused multiply-add operations; we already
19375/// lower a pair of fmul and fadd to the latter so it's not clear that there
19376/// would be a gain or that the gain would be worthwhile enough to risk
19377/// correctness bugs.
19378///
19379/// For MVE, we set this to true as it helps simplify the need for some
19380/// patterns (and we don't have the non-fused floating point instruction).
19381bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19382 EVT VT) const {
19383 if (!VT.isSimple())
19384 return false;
19385
19386 switch (VT.getSimpleVT().SimpleTy) {
19387 case MVT::v4f32:
19388 case MVT::v8f16:
19389 return Subtarget->hasMVEFloatOps();
19390 case MVT::f16:
19391 return Subtarget->useFPVFMx16();
19392 case MVT::f32:
19393 return Subtarget->useFPVFMx();
19394 case MVT::f64:
19395 return Subtarget->useFPVFMx64();
19396 default:
19397 break;
19398 }
19399
19400 return false;
19401}
19402
19403static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19404 if (V < 0)
19405 return false;
19406
19407 unsigned Scale = 1;
19408 switch (VT.getSimpleVT().SimpleTy) {
19409 case MVT::i1:
19410 case MVT::i8:
19411 // Scale == 1;
19412 break;
19413 case MVT::i16:
19414 // Scale == 2;
19415 Scale = 2;
19416 break;
19417 default:
19418 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19419 // Scale == 4;
19420 Scale = 4;
19421 break;
19422 }
19423
19424 if ((V & (Scale - 1)) != 0)
19425 return false;
19426 return isUInt<5>(V / Scale);
19427}
19428
19429static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19430 const ARMSubtarget *Subtarget) {
19431 if (!VT.isInteger() && !VT.isFloatingPoint())
19432 return false;
19433 if (VT.isVector() && Subtarget->hasNEON())
19434 return false;
19435 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19436 !Subtarget->hasMVEFloatOps())
19437 return false;
19438
19439 bool IsNeg = false;
19440 if (V < 0) {
19441 IsNeg = true;
19442 V = -V;
19443 }
19444
19445 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19446
19447 // MVE: size * imm7
19448 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19449 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19450 case MVT::i32:
19451 case MVT::f32:
19452 return isShiftedUInt<7,2>(V);
19453 case MVT::i16:
19454 case MVT::f16:
19455 return isShiftedUInt<7,1>(V);
19456 case MVT::i8:
19457 return isUInt<7>(V);
19458 default:
19459 return false;
19460 }
19461 }
19462
19463 // half VLDR: 2 * imm8
19464 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19465 return isShiftedUInt<8, 1>(V);
19466 // VLDR and LDRD: 4 * imm8
19467 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19468 return isShiftedUInt<8, 2>(V);
19469
19470 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19471 // + imm12 or - imm8
19472 if (IsNeg)
19473 return isUInt<8>(V);
19474 return isUInt<12>(V);
19475 }
19476
19477 return false;
19478}
19479
19480/// isLegalAddressImmediate - Return true if the integer value can be used
19481/// as the offset of the target addressing mode for load / store of the
19482/// given type.
19483static bool isLegalAddressImmediate(int64_t V, EVT VT,
19484 const ARMSubtarget *Subtarget) {
19485 if (V == 0)
19486 return true;
19487
19488 if (!VT.isSimple())
19489 return false;
19490
19491 if (Subtarget->isThumb1Only())
19492 return isLegalT1AddressImmediate(V, VT);
19493 else if (Subtarget->isThumb2())
19494 return isLegalT2AddressImmediate(V, VT, Subtarget);
19495
19496 // ARM mode.
19497 if (V < 0)
19498 V = - V;
19499 switch (VT.getSimpleVT().SimpleTy) {
19500 default: return false;
19501 case MVT::i1:
19502 case MVT::i8:
19503 case MVT::i32:
19504 // +- imm12
19505 return isUInt<12>(V);
19506 case MVT::i16:
19507 // +- imm8
19508 return isUInt<8>(V);
19509 case MVT::f32:
19510 case MVT::f64:
19511 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19512 return false;
19513 return isShiftedUInt<8, 2>(V);
19514 }
19515}
19516
19517bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
19518 EVT VT) const {
19519 int Scale = AM.Scale;
19520 if (Scale < 0)
19521 return false;
19522
19523 switch (VT.getSimpleVT().SimpleTy) {
19524 default: return false;
19525 case MVT::i1:
19526 case MVT::i8:
19527 case MVT::i16:
19528 case MVT::i32:
19529 if (Scale == 1)
19530 return true;
19531 // r + r << imm
19532 Scale = Scale & ~1;
19533 return Scale == 2 || Scale == 4 || Scale == 8;
19534 case MVT::i64:
19535 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19536 // version in Thumb mode.
19537 // r + r
19538 if (Scale == 1)
19539 return true;
19540 // r * 2 (this can be lowered to r + r).
19541 if (!AM.HasBaseReg && Scale == 2)
19542 return true;
19543 return false;
19544 case MVT::isVoid:
19545 // Note, we allow "void" uses (basically, uses that aren't loads or
19546 // stores), because arm allows folding a scale into many arithmetic
19547 // operations. This should be made more precise and revisited later.
19548
19549 // Allow r << imm, but the imm has to be a multiple of two.
19550 if (Scale & 1) return false;
19551 return isPowerOf2_32(Scale);
19552 }
19553}
19554
19555bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM,
19556 EVT VT) const {
19557 const int Scale = AM.Scale;
19558
19559 // Negative scales are not supported in Thumb1.
19560 if (Scale < 0)
19561 return false;
19562
19563 // Thumb1 addressing modes do not support register scaling excepting the
19564 // following cases:
19565 // 1. Scale == 1 means no scaling.
19566 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19567 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19568}
19569
19570/// isLegalAddressingMode - Return true if the addressing mode represented
19571/// by AM is legal for this target, for a load/store of the specified type.
19572bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
19573 const AddrMode &AM, Type *Ty,
19574 unsigned AS, Instruction *I) const {
19575 EVT VT = getValueType(DL, Ty, true);
19576 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19577 return false;
19578
19579 // Can never fold addr of global into load/store.
19580 if (AM.BaseGV)
19581 return false;
19582
19583 switch (AM.Scale) {
19584 case 0: // no scale reg, must be "r+i" or "r", or "i".
19585 break;
19586 default:
19587 // ARM doesn't support any R+R*scale+imm addr modes.
19588 if (AM.BaseOffs)
19589 return false;
19590
19591 if (!VT.isSimple())
19592 return false;
19593
19594 if (Subtarget->isThumb1Only())
19595 return isLegalT1ScaledAddressingMode(AM, VT);
19596
19597 if (Subtarget->isThumb2())
19598 return isLegalT2ScaledAddressingMode(AM, VT);
19599
19600 int Scale = AM.Scale;
19601 switch (VT.getSimpleVT().SimpleTy) {
19602 default: return false;
19603 case MVT::i1:
19604 case MVT::i8:
19605 case MVT::i32:
19606 if (Scale < 0) Scale = -Scale;
19607 if (Scale == 1)
19608 return true;
19609 // r + r << imm
19610 return isPowerOf2_32(Scale & ~1);
19611 case MVT::i16:
19612 case MVT::i64:
19613 // r +/- r
19614 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19615 return true;
19616 // r * 2 (this can be lowered to r + r).
19617 if (!AM.HasBaseReg && Scale == 2)
19618 return true;
19619 return false;
19620
19621 case MVT::isVoid:
19622 // Note, we allow "void" uses (basically, uses that aren't loads or
19623 // stores), because arm allows folding a scale into many arithmetic
19624 // operations. This should be made more precise and revisited later.
19625
19626 // Allow r << imm, but the imm has to be a multiple of two.
19627 if (Scale & 1) return false;
19628 return isPowerOf2_32(Scale);
19629 }
19630 }
19631 return true;
19632}
19633
19634/// isLegalICmpImmediate - Return true if the specified immediate is legal
19635/// icmp immediate, that is the target has icmp instructions which can compare
19636/// a register against the immediate without having to materialize the
19637/// immediate into a register.
19638bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
19639 // Thumb2 and ARM modes can use cmn for negative immediates.
19640 if (!Subtarget->isThumb())
19641 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19642 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19643 if (Subtarget->isThumb2())
19644 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19645 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19646 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19647 return Imm >= 0 && Imm <= 255;
19648}
19649
19650/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19651/// *or sub* immediate, that is the target has add or sub instructions which can
19652/// add a register with the immediate without having to materialize the
19653/// immediate into a register.
19654bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
19655 // Same encoding for add/sub, just flip the sign.
19656 int64_t AbsImm = std::abs(Imm);
19657 if (!Subtarget->isThumb())
19658 return ARM_AM::getSOImmVal(AbsImm) != -1;
19659 if (Subtarget->isThumb2())
19660 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19661 // Thumb1 only has 8-bit unsigned immediate.
19662 return AbsImm >= 0 && AbsImm <= 255;
19663}
19664
19665// Return false to prevent folding
19666// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19667// if the folding leads to worse code.
19668bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
19669 SDValue ConstNode) const {
19670 // Let the DAGCombiner decide for vector types and large types.
19671 const EVT VT = AddNode.getValueType();
19672 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19673 return true;
19674
19675 // It is worse if c0 is legal add immediate, while c1*c0 is not
19676 // and has to be composed by at least two instructions.
19677 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19678 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19679 const int64_t C0 = C0Node->getSExtValue();
19680 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19681 if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue()))
19682 return true;
19683 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19684 return false;
19685
19686 // Default to true and let the DAGCombiner decide.
19687 return true;
19688}
19689
19690static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
19691 bool isSEXTLoad, SDValue &Base,
19692 SDValue &Offset, bool &isInc,
19693 SelectionDAG &DAG) {
19694 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19695 return false;
19696
19697 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19698 // AddressingMode 3
19699 Base = Ptr->getOperand(0);
19700 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19701 int RHSC = (int)RHS->getZExtValue();
19702 if (RHSC < 0 && RHSC > -256) {
19703 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19703, __extension__
__PRETTY_FUNCTION__))
;
19704 isInc = false;
19705 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19706 return true;
19707 }
19708 }
19709 isInc = (Ptr->getOpcode() == ISD::ADD);
19710 Offset = Ptr->getOperand(1);
19711 return true;
19712 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19713 // AddressingMode 2
19714 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19715 int RHSC = (int)RHS->getZExtValue();
19716 if (RHSC < 0 && RHSC > -0x1000) {
19717 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19717, __extension__
__PRETTY_FUNCTION__))
;
19718 isInc = false;
19719 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19720 Base = Ptr->getOperand(0);
19721 return true;
19722 }
19723 }
19724
19725 if (Ptr->getOpcode() == ISD::ADD) {
19726 isInc = true;
19727 ARM_AM::ShiftOpc ShOpcVal=
19728 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19729 if (ShOpcVal != ARM_AM::no_shift) {
19730 Base = Ptr->getOperand(1);
19731 Offset = Ptr->getOperand(0);
19732 } else {
19733 Base = Ptr->getOperand(0);
19734 Offset = Ptr->getOperand(1);
19735 }
19736 return true;
19737 }
19738
19739 isInc = (Ptr->getOpcode() == ISD::ADD);
19740 Base = Ptr->getOperand(0);
19741 Offset = Ptr->getOperand(1);
19742 return true;
19743 }
19744
19745 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19746 return false;
19747}
19748
19749static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
19750 bool isSEXTLoad, SDValue &Base,
19751 SDValue &Offset, bool &isInc,
19752 SelectionDAG &DAG) {
19753 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19754 return false;
19755
19756 Base = Ptr->getOperand(0);
19757 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19758 int RHSC = (int)RHS->getZExtValue();
19759 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19760 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19760, __extension__
__PRETTY_FUNCTION__))
;
19761 isInc = false;
19762 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19763 return true;
19764 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19765 isInc = Ptr->getOpcode() == ISD::ADD;
19766 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19767 return true;
19768 }
19769 }
19770
19771 return false;
19772}
19773
19774static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19775 bool isSEXTLoad, bool IsMasked, bool isLE,
19776 SDValue &Base, SDValue &Offset,
19777 bool &isInc, SelectionDAG &DAG) {
19778 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19779 return false;
19780 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19781 return false;
19782
19783 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19784 // as opposed to a vldrw.32). This can allow extra addressing modes or
19785 // alignments for what is otherwise an equivalent instruction.
19786 bool CanChangeType = isLE && !IsMasked;
19787
19788 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19789 int RHSC = (int)RHS->getZExtValue();
19790
19791 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19792 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19793 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19793, __extension__
__PRETTY_FUNCTION__))
;
19794 isInc = false;
19795 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19796 return true;
19797 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19798 isInc = Ptr->getOpcode() == ISD::ADD;
19799 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19800 return true;
19801 }
19802 return false;
19803 };
19804
19805 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19806 // (in BE/masked) type.
19807 Base = Ptr->getOperand(0);
19808 if (VT == MVT::v4i16) {
19809 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19810 return true;
19811 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19812 if (IsInRange(RHSC, 0x80, 1))
19813 return true;
19814 } else if (Alignment >= 4 &&
19815 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19816 IsInRange(RHSC, 0x80, 4))
19817 return true;
19818 else if (Alignment >= 2 &&
19819 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19820 IsInRange(RHSC, 0x80, 2))
19821 return true;
19822 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19823 return true;
19824 return false;
19825}
19826
19827/// getPreIndexedAddressParts - returns true by value, base pointer and
19828/// offset pointer and addressing mode by reference if the node's address
19829/// can be legally represented as pre-indexed load / store address.
19830bool
19831ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
19832 SDValue &Offset,
19833 ISD::MemIndexedMode &AM,
19834 SelectionDAG &DAG) const {
19835 if (Subtarget->isThumb1Only())
19836 return false;
19837
19838 EVT VT;
19839 SDValue Ptr;
19840 Align Alignment;
19841 bool isSEXTLoad = false;
19842 bool IsMasked = false;
19843 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19844 Ptr = LD->getBasePtr();
19845 VT = LD->getMemoryVT();
19846 Alignment = LD->getAlign();
19847 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19848 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19849 Ptr = ST->getBasePtr();
19850 VT = ST->getMemoryVT();
19851 Alignment = ST->getAlign();
19852 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19853 Ptr = LD->getBasePtr();
19854 VT = LD->getMemoryVT();
19855 Alignment = LD->getAlign();
19856 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19857 IsMasked = true;
19858 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19859 Ptr = ST->getBasePtr();
19860 VT = ST->getMemoryVT();
19861 Alignment = ST->getAlign();
19862 IsMasked = true;
19863 } else
19864 return false;
19865
19866 bool isInc;
19867 bool isLegal = false;
19868 if (VT.isVector())
19869 isLegal = Subtarget->hasMVEIntegerOps() &&
19870 getMVEIndexedAddressParts(
19871 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19872 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19873 else {
19874 if (Subtarget->isThumb2())
19875 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19876 Offset, isInc, DAG);
19877 else
19878 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19879 Offset, isInc, DAG);
19880 }
19881 if (!isLegal)
19882 return false;
19883
19884 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19885 return true;
19886}
19887
19888/// getPostIndexedAddressParts - returns true by value, base pointer and
19889/// offset pointer and addressing mode by reference if this node can be
19890/// combined with a load / store to form a post-indexed load / store.
19891bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
19892 SDValue &Base,
19893 SDValue &Offset,
19894 ISD::MemIndexedMode &AM,
19895 SelectionDAG &DAG) const {
19896 EVT VT;
19897 SDValue Ptr;
19898 Align Alignment;
19899 bool isSEXTLoad = false, isNonExt;
19900 bool IsMasked = false;
19901 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19902 VT = LD->getMemoryVT();
19903 Ptr = LD->getBasePtr();
19904 Alignment = LD->getAlign();
19905 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19906 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19907 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19908 VT = ST->getMemoryVT();
19909 Ptr = ST->getBasePtr();
19910 Alignment = ST->getAlign();
19911 isNonExt = !ST->isTruncatingStore();
19912 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19913 VT = LD->getMemoryVT();
19914 Ptr = LD->getBasePtr();
19915 Alignment = LD->getAlign();
19916 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19917 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19918 IsMasked = true;
19919 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19920 VT = ST->getMemoryVT();
19921 Ptr = ST->getBasePtr();
19922 Alignment = ST->getAlign();
19923 isNonExt = !ST->isTruncatingStore();
19924 IsMasked = true;
19925 } else
19926 return false;
19927
19928 if (Subtarget->isThumb1Only()) {
19929 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19930 // must be non-extending/truncating, i32, with an offset of 4.
19931 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!")(static_cast <bool> (Op->getValueType(0) == MVT::i32
&& "Non-i32 post-inc op?!") ? void (0) : __assert_fail
("Op->getValueType(0) == MVT::i32 && \"Non-i32 post-inc op?!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19931, __extension__
__PRETTY_FUNCTION__))
;
19932 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19933 return false;
19934 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19935 if (!RHS || RHS->getZExtValue() != 4)
19936 return false;
19937 if (Alignment < Align(4))
19938 return false;
19939
19940 Offset = Op->getOperand(1);
19941 Base = Op->getOperand(0);
19942 AM = ISD::POST_INC;
19943 return true;
19944 }
19945
19946 bool isInc;
19947 bool isLegal = false;
19948 if (VT.isVector())
19949 isLegal = Subtarget->hasMVEIntegerOps() &&
19950 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19951 Subtarget->isLittle(), Base, Offset,
19952 isInc, DAG);
19953 else {
19954 if (Subtarget->isThumb2())
19955 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19956 isInc, DAG);
19957 else
19958 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19959 isInc, DAG);
19960 }
19961 if (!isLegal)
19962 return false;
19963
19964 if (Ptr != Base) {
19965 // Swap base ptr and offset to catch more post-index load / store when
19966 // it's legal. In Thumb2 mode, offset must be an immediate.
19967 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19968 !Subtarget->isThumb2())
19969 std::swap(Base, Offset);
19970
19971 // Post-indexed load / store update the base pointer.
19972 if (Ptr != Base)
19973 return false;
19974 }
19975
19976 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19977 return true;
19978}
19979
19980void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
19981 KnownBits &Known,
19982 const APInt &DemandedElts,
19983 const SelectionDAG &DAG,
19984 unsigned Depth) const {
19985 unsigned BitWidth = Known.getBitWidth();
19986 Known.resetAll();
19987 switch (Op.getOpcode()) {
19988 default: break;
19989 case ARMISD::ADDC:
19990 case ARMISD::ADDE:
19991 case ARMISD::SUBC:
19992 case ARMISD::SUBE:
19993 // Special cases when we convert a carry to a boolean.
19994 if (Op.getResNo() == 0) {
19995 SDValue LHS = Op.getOperand(0);
19996 SDValue RHS = Op.getOperand(1);
19997 // (ADDE 0, 0, C) will give us a single bit.
19998 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
19999 isNullConstant(RHS)) {
20000 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
20001 return;
20002 }
20003 }
20004 break;
20005 case ARMISD::CMOV: {
20006 // Bits are known zero/one if known on the LHS and RHS.
20007 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20008 if (Known.isUnknown())
20009 return;
20010
20011 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20012 Known = KnownBits::commonBits(Known, KnownRHS);
20013 return;
20014 }
20015 case ISD::INTRINSIC_W_CHAIN: {
20016 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
20017 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
20018 switch (IntID) {
20019 default: return;
20020 case Intrinsic::arm_ldaex:
20021 case Intrinsic::arm_ldrex: {
20022 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20023 unsigned MemBits = VT.getScalarSizeInBits();
20024 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20025 return;
20026 }
20027 }
20028 }
20029 case ARMISD::BFI: {
20030 // Conservatively, we can recurse down the first operand
20031 // and just mask out all affected bits.
20032 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20033
20034 // The operand to BFI is already a mask suitable for removing the bits it
20035 // sets.
20036 ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
20037 const APInt &Mask = CI->getAPIntValue();
20038 Known.Zero &= Mask;
20039 Known.One &= Mask;
20040 return;
20041 }
20042 case ARMISD::VGETLANEs:
20043 case ARMISD::VGETLANEu: {
20044 const SDValue &SrcSV = Op.getOperand(0);
20045 EVT VecVT = SrcSV.getValueType();
20046 assert(VecVT.isVector() && "VGETLANE expected a vector type")(static_cast <bool> (VecVT.isVector() && "VGETLANE expected a vector type"
) ? void (0) : __assert_fail ("VecVT.isVector() && \"VGETLANE expected a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20046, __extension__
__PRETTY_FUNCTION__))
;
20047 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20048 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20049 assert(Pos->getAPIntValue().ult(NumSrcElts) &&(static_cast <bool> (Pos->getAPIntValue().ult(NumSrcElts
) && "VGETLANE index out of bounds") ? void (0) : __assert_fail
("Pos->getAPIntValue().ult(NumSrcElts) && \"VGETLANE index out of bounds\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20050, __extension__
__PRETTY_FUNCTION__))
20050 "VGETLANE index out of bounds")(static_cast <bool> (Pos->getAPIntValue().ult(NumSrcElts
) && "VGETLANE index out of bounds") ? void (0) : __assert_fail
("Pos->getAPIntValue().ult(NumSrcElts) && \"VGETLANE index out of bounds\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20050, __extension__
__PRETTY_FUNCTION__))
;
20051 unsigned Idx = Pos->getZExtValue();
20052 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20053 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20054
20055 EVT VT = Op.getValueType();
20056 const unsigned DstSz = VT.getScalarSizeInBits();
20057 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20058 (void)SrcSz;
20059 assert(SrcSz == Known.getBitWidth())(static_cast <bool> (SrcSz == Known.getBitWidth()) ? void
(0) : __assert_fail ("SrcSz == Known.getBitWidth()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 20059, __extension__ __PRETTY_FUNCTION__))
;
20060 assert(DstSz > SrcSz)(static_cast <bool> (DstSz > SrcSz) ? void (0) : __assert_fail
("DstSz > SrcSz", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 20060, __extension__ __PRETTY_FUNCTION__))
;
20061 if (Op.getOpcode() == ARMISD::VGETLANEs)
20062 Known = Known.sext(DstSz);
20063 else {
20064 Known = Known.zext(DstSz);
20065 }
20066 assert(DstSz == Known.getBitWidth())(static_cast <bool> (DstSz == Known.getBitWidth()) ? void
(0) : __assert_fail ("DstSz == Known.getBitWidth()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 20066, __extension__ __PRETTY_FUNCTION__))
;
20067 break;
20068 }
20069 case ARMISD::VMOVrh: {
20070 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20071 assert(KnownOp.getBitWidth() == 16)(static_cast <bool> (KnownOp.getBitWidth() == 16) ? void
(0) : __assert_fail ("KnownOp.getBitWidth() == 16", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 20071, __extension__ __PRETTY_FUNCTION__))
;
20072 Known = KnownOp.zext(32);
20073 break;
20074 }
20075 case ARMISD::CSINC:
20076 case ARMISD::CSINV:
20077 case ARMISD::CSNEG: {
20078 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20079 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20080
20081 // The result is either:
20082 // CSINC: KnownOp0 or KnownOp1 + 1
20083 // CSINV: KnownOp0 or ~KnownOp1
20084 // CSNEG: KnownOp0 or KnownOp1 * -1
20085 if (Op.getOpcode() == ARMISD::CSINC)
20086 KnownOp1 = KnownBits::computeForAddSub(
20087 true, false, KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20088 else if (Op.getOpcode() == ARMISD::CSINV)
20089 std::swap(KnownOp1.Zero, KnownOp1.One);
20090 else if (Op.getOpcode() == ARMISD::CSNEG)
20091 KnownOp1 = KnownBits::mul(
20092 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20093
20094 Known = KnownBits::commonBits(KnownOp0, KnownOp1);
20095 break;
20096 }
20097 }
20098}
20099
20100bool ARMTargetLowering::targetShrinkDemandedConstant(
20101 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20102 TargetLoweringOpt &TLO) const {
20103 // Delay optimization, so we don't have to deal with illegal types, or block
20104 // optimizations.
20105 if (!TLO.LegalOps)
20106 return false;
20107
20108 // Only optimize AND for now.
20109 if (Op.getOpcode() != ISD::AND)
20110 return false;
20111
20112 EVT VT = Op.getValueType();
20113
20114 // Ignore vectors.
20115 if (VT.isVector())
20116 return false;
20117
20118 assert(VT == MVT::i32 && "Unexpected integer type")(static_cast <bool> (VT == MVT::i32 && "Unexpected integer type"
) ? void (0) : __assert_fail ("VT == MVT::i32 && \"Unexpected integer type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20118, __extension__
__PRETTY_FUNCTION__))
;
20119
20120 // Make sure the RHS really is a constant.
20121 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20122 if (!C)
20123 return false;
20124
20125 unsigned Mask = C->getZExtValue();
20126
20127 unsigned Demanded = DemandedBits.getZExtValue();
20128 unsigned ShrunkMask = Mask & Demanded;
20129 unsigned ExpandedMask = Mask | ~Demanded;
20130
20131 // If the mask is all zeros, let the target-independent code replace the
20132 // result with zero.
20133 if (ShrunkMask == 0)
20134 return false;
20135
20136 // If the mask is all ones, erase the AND. (Currently, the target-independent
20137 // code won't do this, so we have to do it explicitly to avoid an infinite
20138 // loop in obscure cases.)
20139 if (ExpandedMask == ~0U)
20140 return TLO.CombineTo(Op, Op.getOperand(0));
20141
20142 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20143 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20144 };
20145 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20146 if (NewMask == Mask)
20147 return true;
20148 SDLoc DL(Op);
20149 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20150 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20151 return TLO.CombineTo(Op, NewOp);
20152 };
20153
20154 // Prefer uxtb mask.
20155 if (IsLegalMask(0xFF))
20156 return UseMask(0xFF);
20157
20158 // Prefer uxth mask.
20159 if (IsLegalMask(0xFFFF))
20160 return UseMask(0xFFFF);
20161
20162 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20163 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20164 if (ShrunkMask < 256)
20165 return UseMask(ShrunkMask);
20166
20167 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20168 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20169 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20170 return UseMask(ExpandedMask);
20171
20172 // Potential improvements:
20173 //
20174 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20175 // We could try to prefer Thumb1 immediates which can be lowered to a
20176 // two-instruction sequence.
20177 // We could try to recognize more legal ARM/Thumb2 immediates here.
20178
20179 return false;
20180}
20181
20182bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
20183 SDValue Op, const APInt &OriginalDemandedBits,
20184 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20185 unsigned Depth) const {
20186 unsigned Opc = Op.getOpcode();
20187
20188 switch (Opc) {
20189 case ARMISD::ASRL:
20190 case ARMISD::LSRL: {
20191 // If this is result 0 and the other result is unused, see if the demand
20192 // bits allow us to shrink this long shift into a standard small shift in
20193 // the opposite direction.
20194 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20195 isa<ConstantSDNode>(Op->getOperand(2))) {
20196 unsigned ShAmt = Op->getConstantOperandVal(2);
20197 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20198 << (32 - ShAmt)))
20199 return TLO.CombineTo(
20200 Op, TLO.DAG.getNode(
20201 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20202 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20203 }
20204 break;
20205 }
20206 case ARMISD::VBICIMM: {
20207 SDValue Op0 = Op.getOperand(0);
20208 unsigned ModImm = Op.getConstantOperandVal(1);
20209 unsigned EltBits = 0;
20210 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20211 if ((OriginalDemandedBits & Mask) == 0)
20212 return TLO.CombineTo(Op, Op0);
20213 }
20214 }
20215
20216 return TargetLowering::SimplifyDemandedBitsForTargetNode(
20217 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20218}
20219
20220//===----------------------------------------------------------------------===//
20221// ARM Inline Assembly Support
20222//===----------------------------------------------------------------------===//
20223
20224bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
20225 // Looking for "rev" which is V6+.
20226 if (!Subtarget->hasV6Ops())
20227 return false;
20228
20229 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20230 std::string AsmStr = IA->getAsmString();
20231 SmallVector<StringRef, 4> AsmPieces;
20232 SplitString(AsmStr, AsmPieces, ";\n");
20233
20234 switch (AsmPieces.size()) {
20235 default: return false;
20236 case 1:
20237 AsmStr = std::string(AsmPieces[0]);
20238 AsmPieces.clear();
20239 SplitString(AsmStr, AsmPieces, " \t,");
20240
20241 // rev $0, $1
20242 if (AsmPieces.size() == 3 &&
20243 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20244 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20245 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20246 if (Ty && Ty->getBitWidth() == 32)
20247 return IntrinsicLowering::LowerToByteSwap(CI);
20248 }
20249 break;
20250 }
20251
20252 return false;
20253}
20254
20255const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20256 // At this point, we have to lower this constraint to something else, so we
20257 // lower it to an "r" or "w". However, by doing this we will force the result
20258 // to be in register, while the X constraint is much more permissive.
20259 //
20260 // Although we are correct (we are free to emit anything, without
20261 // constraints), we might break use cases that would expect us to be more
20262 // efficient and emit something else.
20263 if (!Subtarget->hasVFP2Base())
20264 return "r";
20265 if (ConstraintVT.isFloatingPoint())
20266 return "w";
20267 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20268 (ConstraintVT.getSizeInBits() == 64 ||
20269 ConstraintVT.getSizeInBits() == 128))
20270 return "w";
20271
20272 return "r";
20273}
20274
20275/// getConstraintType - Given a constraint letter, return the type of
20276/// constraint it is for this target.
20277ARMTargetLowering::ConstraintType
20278ARMTargetLowering::getConstraintType(StringRef Constraint) const {
20279 unsigned S = Constraint.size();
20280 if (S == 1) {
20281 switch (Constraint[0]) {
20282 default: break;
20283 case 'l': return C_RegisterClass;
20284 case 'w': return C_RegisterClass;
20285 case 'h': return C_RegisterClass;
20286 case 'x': return C_RegisterClass;
20287 case 't': return C_RegisterClass;
20288 case 'j': return C_Immediate; // Constant for movw.
20289 // An address with a single base register. Due to the way we
20290 // currently handle addresses it is the same as an 'r' memory constraint.
20291 case 'Q': return C_Memory;
20292 }
20293 } else if (S == 2) {
20294 switch (Constraint[0]) {
20295 default: break;
20296 case 'T': return C_RegisterClass;
20297 // All 'U+' constraints are addresses.
20298 case 'U': return C_Memory;
20299 }
20300 }
20301 return TargetLowering::getConstraintType(Constraint);
20302}
20303
20304/// Examine constraint type and operand type and determine a weight value.
20305/// This object must already have been set up with the operand type
20306/// and the current alternative constraint selected.
20307TargetLowering::ConstraintWeight
20308ARMTargetLowering::getSingleConstraintMatchWeight(
20309 AsmOperandInfo &info, const char *constraint) const {
20310 ConstraintWeight weight = CW_Invalid;
20311 Value *CallOperandVal = info.CallOperandVal;
20312 // If we don't have a value, we can't do a match,
20313 // but allow it at the lowest weight.
20314 if (!CallOperandVal)
20315 return CW_Default;
20316 Type *type = CallOperandVal->getType();
20317 // Look at the constraint type.
20318 switch (*constraint) {
20319 default:
20320 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
20321 break;
20322 case 'l':
20323 if (type->isIntegerTy()) {
20324 if (Subtarget->isThumb())
20325 weight = CW_SpecificReg;
20326 else
20327 weight = CW_Register;
20328 }
20329 break;
20330 case 'w':
20331 if (type->isFloatingPointTy())
20332 weight = CW_Register;
20333 break;
20334 }
20335 return weight;
20336}
20337
20338using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20339
20340RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
20341 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20342 switch (Constraint.size()) {
20343 case 1:
20344 // GCC ARM Constraint Letters
20345 switch (Constraint[0]) {
20346 case 'l': // Low regs or general regs.
20347 if (Subtarget->isThumb())
20348 return RCPair(0U, &ARM::tGPRRegClass);
20349 return RCPair(0U, &ARM::GPRRegClass);
20350 case 'h': // High regs or no regs.
20351 if (Subtarget->isThumb())
20352 return RCPair(0U, &ARM::hGPRRegClass);
20353 break;
20354 case 'r':
20355 if (Subtarget->isThumb1Only())
20356 return RCPair(0U, &ARM::tGPRRegClass);
20357 return RCPair(0U, &ARM::GPRRegClass);
20358 case 'w':
20359 if (VT == MVT::Other)
20360 break;
20361 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20362 return RCPair(0U, &ARM::SPRRegClass);
20363 if (VT.getSizeInBits() == 64)
20364 return RCPair(0U, &ARM::DPRRegClass);
20365 if (VT.getSizeInBits() == 128)
20366 return RCPair(0U, &ARM::QPRRegClass);
20367 break;
20368 case 'x':
20369 if (VT == MVT::Other)
20370 break;
20371 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20372 return RCPair(0U, &ARM::SPR_8RegClass);
20373 if (VT.getSizeInBits() == 64)
20374 return RCPair(0U, &ARM::DPR_8RegClass);
20375 if (VT.getSizeInBits() == 128)
20376 return RCPair(0U, &ARM::QPR_8RegClass);
20377 break;
20378 case 't':
20379 if (VT == MVT::Other)
20380 break;
20381 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20382 return RCPair(0U, &ARM::SPRRegClass);
20383 if (VT.getSizeInBits() == 64)
20384 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20385 if (VT.getSizeInBits() == 128)
20386 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20387 break;
20388 }
20389 break;
20390
20391 case 2:
20392 if (Constraint[0] == 'T') {
20393 switch (Constraint[1]) {
20394 default:
20395 break;
20396 case 'e':
20397 return RCPair(0U, &ARM::tGPREvenRegClass);
20398 case 'o':
20399 return RCPair(0U, &ARM::tGPROddRegClass);
20400 }
20401 }
20402 break;
20403
20404 default:
20405 break;
20406 }
20407
20408 if (StringRef("{cc}").equals_insensitive(Constraint))
20409 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20410
20411 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20412}
20413
20414/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20415/// vector. If it is invalid, don't add anything to Ops.
20416void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
20417 std::string &Constraint,
20418 std::vector<SDValue>&Ops,
20419 SelectionDAG &DAG) const {
20420 SDValue Result;
20421
20422 // Currently only support length 1 constraints.
20423 if (Constraint.length() != 1) return;
20424
20425 char ConstraintLetter = Constraint[0];
20426 switch (ConstraintLetter) {
20427 default: break;
20428 case 'j':
20429 case 'I': case 'J': case 'K': case 'L':
20430 case 'M': case 'N': case 'O':
20431 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20432 if (!C)
20433 return;
20434
20435 int64_t CVal64 = C->getSExtValue();
20436 int CVal = (int) CVal64;
20437 // None of these constraints allow values larger than 32 bits. Check
20438 // that the value fits in an int.
20439 if (CVal != CVal64)
20440 return;
20441
20442 switch (ConstraintLetter) {
20443 case 'j':
20444 // Constant suitable for movw, must be between 0 and
20445 // 65535.
20446 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20447 if (CVal >= 0 && CVal <= 65535)
20448 break;
20449 return;
20450 case 'I':
20451 if (Subtarget->isThumb1Only()) {
20452 // This must be a constant between 0 and 255, for ADD
20453 // immediates.
20454 if (CVal >= 0 && CVal <= 255)
20455 break;
20456 } else if (Subtarget->isThumb2()) {
20457 // A constant that can be used as an immediate value in a
20458 // data-processing instruction.
20459 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20460 break;
20461 } else {
20462 // A constant that can be used as an immediate value in a
20463 // data-processing instruction.
20464 if (ARM_AM::getSOImmVal(CVal) != -1)
20465 break;
20466 }
20467 return;
20468
20469 case 'J':
20470 if (Subtarget->isThumb1Only()) {
20471 // This must be a constant between -255 and -1, for negated ADD
20472 // immediates. This can be used in GCC with an "n" modifier that
20473 // prints the negated value, for use with SUB instructions. It is
20474 // not useful otherwise but is implemented for compatibility.
20475 if (CVal >= -255 && CVal <= -1)
20476 break;
20477 } else {
20478 // This must be a constant between -4095 and 4095. It is not clear
20479 // what this constraint is intended for. Implemented for
20480 // compatibility with GCC.
20481 if (CVal >= -4095 && CVal <= 4095)
20482 break;
20483 }
20484 return;
20485
20486 case 'K':
20487 if (Subtarget->isThumb1Only()) {
20488 // A 32-bit value where only one byte has a nonzero value. Exclude
20489 // zero to match GCC. This constraint is used by GCC internally for
20490 // constants that can be loaded with a move/shift combination.
20491 // It is not useful otherwise but is implemented for compatibility.
20492 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20493 break;
20494 } else if (Subtarget->isThumb2()) {
20495 // A constant whose bitwise inverse can be used as an immediate
20496 // value in a data-processing instruction. This can be used in GCC
20497 // with a "B" modifier that prints the inverted value, for use with
20498 // BIC and MVN instructions. It is not useful otherwise but is
20499 // implemented for compatibility.
20500 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20501 break;
20502 } else {
20503 // A constant whose bitwise inverse can be used as an immediate
20504 // value in a data-processing instruction. This can be used in GCC
20505 // with a "B" modifier that prints the inverted value, for use with
20506 // BIC and MVN instructions. It is not useful otherwise but is
20507 // implemented for compatibility.
20508 if (ARM_AM::getSOImmVal(~CVal) != -1)
20509 break;
20510 }
20511 return;
20512
20513 case 'L':
20514 if (Subtarget->isThumb1Only()) {
20515 // This must be a constant between -7 and 7,
20516 // for 3-operand ADD/SUB immediate instructions.
20517 if (CVal >= -7 && CVal < 7)
20518 break;
20519 } else if (Subtarget->isThumb2()) {
20520 // A constant whose negation can be used as an immediate value in a
20521 // data-processing instruction. This can be used in GCC with an "n"
20522 // modifier that prints the negated value, for use with SUB
20523 // instructions. It is not useful otherwise but is implemented for
20524 // compatibility.
20525 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20526 break;
20527 } else {
20528 // A constant whose negation can be used as an immediate value in a
20529 // data-processing instruction. This can be used in GCC with an "n"
20530 // modifier that prints the negated value, for use with SUB
20531 // instructions. It is not useful otherwise but is implemented for
20532 // compatibility.
20533 if (ARM_AM::getSOImmVal(-CVal) != -1)
20534 break;
20535 }
20536 return;
20537
20538 case 'M':
20539 if (Subtarget->isThumb1Only()) {
20540 // This must be a multiple of 4 between 0 and 1020, for
20541 // ADD sp + immediate.
20542 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20543 break;
20544 } else {
20545 // A power of two or a constant between 0 and 32. This is used in
20546 // GCC for the shift amount on shifted register operands, but it is
20547 // useful in general for any shift amounts.
20548 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20549 break;
20550 }
20551 return;
20552
20553 case 'N':
20554 if (Subtarget->isThumb1Only()) {
20555 // This must be a constant between 0 and 31, for shift amounts.
20556 if (CVal >= 0 && CVal <= 31)
20557 break;
20558 }
20559 return;
20560
20561 case 'O':
20562 if (Subtarget->isThumb1Only()) {
20563 // This must be a multiple of 4 between -508 and 508, for
20564 // ADD/SUB sp = sp + immediate.
20565 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20566 break;
20567 }
20568 return;
20569 }
20570 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20571 break;
20572 }
20573
20574 if (Result.getNode()) {
20575 Ops.push_back(Result);
20576 return;
20577 }
20578 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20579}
20580
20581static RTLIB::Libcall getDivRemLibcall(
20582 const SDNode *N, MVT::SimpleValueType SVT) {
20583 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20585, __extension__
__PRETTY_FUNCTION__))
20584 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20585, __extension__
__PRETTY_FUNCTION__))
20585 "Unhandled Opcode in getDivRemLibcall")(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20585, __extension__
__PRETTY_FUNCTION__))
;
20586 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20587 N->getOpcode() == ISD::SREM;
20588 RTLIB::Libcall LC;
20589 switch (SVT) {
20590 default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20590)
;
20591 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20592 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20593 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20594 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20595 }
20596 return LC;
20597}
20598
20599static TargetLowering::ArgListTy getDivRemArgList(
20600 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20601 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20603, __extension__
__PRETTY_FUNCTION__))
20602 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20603, __extension__
__PRETTY_FUNCTION__))
20603 "Unhandled Opcode in getDivRemArgList")(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20603, __extension__
__PRETTY_FUNCTION__))
;
20604 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20605 N->getOpcode() == ISD::SREM;
20606 TargetLowering::ArgListTy Args;
20607 TargetLowering::ArgListEntry Entry;
20608 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20609 EVT ArgVT = N->getOperand(i).getValueType();
20610 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20611 Entry.Node = N->getOperand(i);
20612 Entry.Ty = ArgTy;
20613 Entry.IsSExt = isSigned;
20614 Entry.IsZExt = !isSigned;
20615 Args.push_back(Entry);
20616 }
20617 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20618 std::swap(Args[0], Args[1]);
20619 return Args;
20620}
20621
20622SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20623 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20626, __extension__
__PRETTY_FUNCTION__))
20624 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20626, __extension__
__PRETTY_FUNCTION__))
20625 Subtarget->isTargetWindows()) &&(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20626, __extension__
__PRETTY_FUNCTION__))
20626 "Register-based DivRem lowering only")(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20626, __extension__
__PRETTY_FUNCTION__))
;
20627 unsigned Opcode = Op->getOpcode();
20628 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&(static_cast <bool> ((Opcode == ISD::SDIVREM || Opcode ==
ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"
) ? void (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20629, __extension__
__PRETTY_FUNCTION__))
20629 "Invalid opcode for Div/Rem lowering")(static_cast <bool> ((Opcode == ISD::SDIVREM || Opcode ==
ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"
) ? void (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20629, __extension__
__PRETTY_FUNCTION__))
;
20630 bool isSigned = (Opcode == ISD::SDIVREM);
20631 EVT VT = Op->getValueType(0);
20632 SDLoc dl(Op);
20633
20634 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20635 SmallVector<SDValue> Result;
20636 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20637 SDValue Res0 =
20638 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20639 SDValue Res1 =
20640 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20641 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20642 {Res0, Res1});
20643 }
20644 }
20645
20646 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20647
20648 // If the target has hardware divide, use divide + multiply + subtract:
20649 // div = a / b
20650 // rem = a - b * div
20651 // return {div, rem}
20652 // This should be lowered into UDIV/SDIV + MLS later on.
20653 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20654 : Subtarget->hasDivideInARMMode();
20655 if (hasDivide && Op->getValueType(0).isSimple() &&
20656 Op->getSimpleValueType(0) == MVT::i32) {
20657 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20658 const SDValue Dividend = Op->getOperand(0);
20659 const SDValue Divisor = Op->getOperand(1);
20660 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20661 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20662 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20663
20664 SDValue Values[2] = {Div, Rem};
20665 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20666 }
20667
20668 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20669 VT.getSimpleVT().SimpleTy);
20670 SDValue InChain = DAG.getEntryNode();
20671
20672 TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
20673 DAG.getContext(),
20674 Subtarget);
20675
20676 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
20677 getPointerTy(DAG.getDataLayout()));
20678
20679 Type *RetTy = StructType::get(Ty, Ty);
20680
20681 if (Subtarget->isTargetWindows())
20682 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20683
20684 TargetLowering::CallLoweringInfo CLI(DAG);
20685 CLI.setDebugLoc(dl).setChain(InChain)
20686 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20687 .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
20688
20689 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20690 return CallInfo.first;
20691}
20692
20693// Lowers REM using divmod helpers
20694// see RTABI section 4.2/4.3
20695SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20696 EVT VT = N->getValueType(0);
20697
20698 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20699 SmallVector<SDValue> Result;
20700 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20701 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20702 Result[0], Result[1]);
20703 }
20704
20705 // Build return types (div and rem)
20706 std::vector<Type*> RetTyParams;
20707 Type *RetTyElement;
20708
20709 switch (VT.getSimpleVT().SimpleTy) {
20710 default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20710)
;
20711 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20712 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20713 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20714 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20715 }
20716
20717 RetTyParams.push_back(RetTyElement);
20718 RetTyParams.push_back(RetTyElement);
20719 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20720 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20721
20722 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20723 SimpleTy);
20724 SDValue InChain = DAG.getEntryNode();
20725 TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
20726 Subtarget);
20727 bool isSigned = N->getOpcode() == ISD::SREM;
20728 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
20729 getPointerTy(DAG.getDataLayout()));
20730
20731 if (Subtarget->isTargetWindows())
20732 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20733
20734 // Lower call
20735 CallLoweringInfo CLI(DAG);
20736 CLI.setChain(InChain)
20737 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20738 .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
20739 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20740
20741 // Return second (rem) result operand (first contains div)
20742 SDNode *ResNode = CallResult.first.getNode();
20743 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands")(static_cast <bool> (ResNode->getNumOperands() == 2 &&
"divmod should return two operands") ? void (0) : __assert_fail
("ResNode->getNumOperands() == 2 && \"divmod should return two operands\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20743, __extension__
__PRETTY_FUNCTION__))
;
20744 return ResNode->getOperand(1);
20745}
20746
20747SDValue
20748ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20749 assert(Subtarget->isTargetWindows() && "unsupported target platform")(static_cast <bool> (Subtarget->isTargetWindows() &&
"unsupported target platform") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"unsupported target platform\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20749, __extension__
__PRETTY_FUNCTION__))
;
20750 SDLoc DL(Op);
20751
20752 // Get the inputs.
20753 SDValue Chain = Op.getOperand(0);
20754 SDValue Size = Op.getOperand(1);
20755
20756 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
20757 "no-stack-arg-probe")) {
20758 MaybeAlign Align =
20759 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20760 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20761 Chain = SP.getValue(1);
20762 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20763 if (Align)
20764 SP =
20765 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20766 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20767 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20768 SDValue Ops[2] = { SP, Chain };
20769 return DAG.getMergeValues(Ops, DL);
20770 }
20771
20772 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20773 DAG.getConstant(2, DL, MVT::i32));
20774
20775 SDValue Glue;
20776 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20777 Glue = Chain.getValue(1);
20778
20779 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20780 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20781
20782 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20783 Chain = NewSP.getValue(1);
20784
20785 SDValue Ops[2] = { NewSP, Chain };
20786 return DAG.getMergeValues(Ops, DL);
20787}
20788
20789SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20790 bool IsStrict = Op->isStrictFPOpcode();
20791 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20792 const unsigned DstSz = Op.getValueType().getSizeInBits();
20793 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20794 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&(static_cast <bool> (DstSz > SrcSz && DstSz <=
64 && SrcSz >= 16 && "Unexpected type for custom-lowering FP_EXTEND"
) ? void (0) : __assert_fail ("DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20795, __extension__
__PRETTY_FUNCTION__))
20795 "Unexpected type for custom-lowering FP_EXTEND")(static_cast <bool> (DstSz > SrcSz && DstSz <=
64 && SrcSz >= 16 && "Unexpected type for custom-lowering FP_EXTEND"
) ? void (0) : __assert_fail ("DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20795, __extension__
__PRETTY_FUNCTION__))
;
20796
20797 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20798, __extension__
__PRETTY_FUNCTION__))
20798 "With both FP DP and 16, any FP conversion is legal!")(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20798, __extension__
__PRETTY_FUNCTION__))
;
20799
20800 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&(static_cast <bool> (!(DstSz == 32 && Subtarget
->hasFP16()) && "With FP16, 16 to 32 conversion is legal!"
) ? void (0) : __assert_fail ("!(DstSz == 32 && Subtarget->hasFP16()) && \"With FP16, 16 to 32 conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20801, __extension__
__PRETTY_FUNCTION__))
20801 "With FP16, 16 to 32 conversion is legal!")(static_cast <bool> (!(DstSz == 32 && Subtarget
->hasFP16()) && "With FP16, 16 to 32 conversion is legal!"
) ? void (0) : __assert_fail ("!(DstSz == 32 && Subtarget->hasFP16()) && \"With FP16, 16 to 32 conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20801, __extension__
__PRETTY_FUNCTION__))
;
20802
20803 // Converting from 32 -> 64 is valid if we have FP64.
20804 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20805 // FIXME: Remove this when we have strict fp instruction selection patterns
20806 if (IsStrict) {
20807 SDLoc Loc(Op);
20808 SDValue Result = DAG.getNode(ISD::FP_EXTEND,
20809 Loc, Op.getValueType(), SrcVal);
20810 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20811 }
20812 return Op;
20813 }
20814
20815 // Either we are converting from 16 -> 64, without FP16 and/or
20816 // FP.double-precision or without Armv8-fp. So we must do it in two
20817 // steps.
20818 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20819 // without FP16. So we must do a function call.
20820 SDLoc Loc(Op);
20821 RTLIB::Libcall LC;
20822 MakeLibCallOptions CallOptions;
20823 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20824 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20825 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20826 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20827 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20828 if (Supported) {
20829 if (IsStrict) {
20830 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20831 {DstVT, MVT::Other}, {Chain, SrcVal});
20832 Chain = SrcVal.getValue(1);
20833 } else {
20834 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20835 }
20836 } else {
20837 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20838 assert(LC != RTLIB::UNKNOWN_LIBCALL &&(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20839, __extension__
__PRETTY_FUNCTION__))
20839 "Unexpected type for custom-lowering FP_EXTEND")(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20839, __extension__
__PRETTY_FUNCTION__))
;
20840 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20841 Loc, Chain);
20842 }
20843 }
20844
20845 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20846}
20847
20848SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20849 bool IsStrict = Op->isStrictFPOpcode();
20850
20851 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20852 EVT SrcVT = SrcVal.getValueType();
20853 EVT DstVT = Op.getValueType();
20854 const unsigned DstSz = Op.getValueType().getSizeInBits();
20855 const unsigned SrcSz = SrcVT.getSizeInBits();
20856 (void)DstSz;
20857 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&(static_cast <bool> (DstSz < SrcSz && SrcSz <=
64 && DstSz >= 16 && "Unexpected type for custom-lowering FP_ROUND"
) ? void (0) : __assert_fail ("DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20858, __extension__
__PRETTY_FUNCTION__))
20858 "Unexpected type for custom-lowering FP_ROUND")(static_cast <bool> (DstSz < SrcSz && SrcSz <=
64 && DstSz >= 16 && "Unexpected type for custom-lowering FP_ROUND"
) ? void (0) : __assert_fail ("DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20858, __extension__
__PRETTY_FUNCTION__))
;
20859
20860 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20861, __extension__
__PRETTY_FUNCTION__))
20861 "With both FP DP and 16, any FP conversion is legal!")(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20861, __extension__
__PRETTY_FUNCTION__))
;
20862
20863 SDLoc Loc(Op);
20864
20865 // Instruction from 32 -> 16 if hasFP16 is valid
20866 if (SrcSz == 32 && Subtarget->hasFP16())
20867 return Op;
20868
20869 // Lib call from 32 -> 16 / 64 -> [32, 16]
20870 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20871 assert(LC != RTLIB::UNKNOWN_LIBCALL &&(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_ROUND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20872, __extension__
__PRETTY_FUNCTION__))
20872 "Unexpected type for custom-lowering FP_ROUND")(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_ROUND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20872, __extension__
__PRETTY_FUNCTION__))
;
20873 MakeLibCallOptions CallOptions;
20874 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20875 SDValue Result;
20876 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20877 Loc, Chain);
20878 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20879}
20880
20881bool
20882ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
20883 // The ARM target isn't yet aware of offsets.
20884 return false;
20885}
20886
20887bool ARM::isBitFieldInvertedMask(unsigned v) {
20888 if (v == 0xffffffff)
20889 return false;
20890
20891 // there can be 1's on either or both "outsides", all the "inside"
20892 // bits must be 0's
20893 return isShiftedMask_32(~v);
20894}
20895
20896/// isFPImmLegal - Returns true if the target can instruction select the
20897/// specified FP immediate natively. If false, the legalizer will
20898/// materialize the FP immediate as a load from a constant pool.
20899bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
20900 bool ForCodeSize) const {
20901 if (!Subtarget->hasVFP3Base())
20902 return false;
20903 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20904 return ARM_AM::getFP16Imm(Imm) != -1;
20905 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20906 ARM_AM::getFP32FP16Imm(Imm) != -1)
20907 return true;
20908 if (VT == MVT::f32)
20909 return ARM_AM::getFP32Imm(Imm) != -1;
20910 if (VT == MVT::f64 && Subtarget->hasFP64())
20911 return ARM_AM::getFP64Imm(Imm) != -1;
20912 return false;
20913}
20914
20915/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20916/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20917/// specified in the intrinsic calls.
20918bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
20919 const CallInst &I,
20920 MachineFunction &MF,
20921 unsigned Intrinsic) const {
20922 switch (Intrinsic) {
20923 case Intrinsic::arm_neon_vld1:
20924 case Intrinsic::arm_neon_vld2:
20925 case Intrinsic::arm_neon_vld3:
20926 case Intrinsic::arm_neon_vld4:
20927 case Intrinsic::arm_neon_vld2lane:
20928 case Intrinsic::arm_neon_vld3lane:
20929 case Intrinsic::arm_neon_vld4lane:
20930 case Intrinsic::arm_neon_vld2dup:
20931 case Intrinsic::arm_neon_vld3dup:
20932 case Intrinsic::arm_neon_vld4dup: {
20933 Info.opc = ISD::INTRINSIC_W_CHAIN;
20934 // Conservatively set memVT to the entire set of vectors loaded.
20935 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20936 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20937 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20938 Info.ptrVal = I.getArgOperand(0);
20939 Info.offset = 0;
20940 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20941 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20942 // volatile loads with NEON intrinsics not supported
20943 Info.flags = MachineMemOperand::MOLoad;
20944 return true;
20945 }
20946 case Intrinsic::arm_neon_vld1x2:
20947 case Intrinsic::arm_neon_vld1x3:
20948 case Intrinsic::arm_neon_vld1x4: {
20949 Info.opc = ISD::INTRINSIC_W_CHAIN;
20950 // Conservatively set memVT to the entire set of vectors loaded.
20951 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20952 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20953 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20954 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20955 Info.offset = 0;
20956 Info.align.reset();
20957 // volatile loads with NEON intrinsics not supported
20958 Info.flags = MachineMemOperand::MOLoad;
20959 return true;
20960 }
20961 case Intrinsic::arm_neon_vst1:
20962 case Intrinsic::arm_neon_vst2:
20963 case Intrinsic::arm_neon_vst3:
20964 case Intrinsic::arm_neon_vst4:
20965 case Intrinsic::arm_neon_vst2lane:
20966 case Intrinsic::arm_neon_vst3lane:
20967 case Intrinsic::arm_neon_vst4lane: {
20968 Info.opc = ISD::INTRINSIC_VOID;
20969 // Conservatively set memVT to the entire set of vectors stored.
20970 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20971 unsigned NumElts = 0;
20972 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20973 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20974 if (!ArgTy->isVectorTy())
20975 break;
20976 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20977 }
20978 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20979 Info.ptrVal = I.getArgOperand(0);
20980 Info.offset = 0;
20981 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20982 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20983 // volatile stores with NEON intrinsics not supported
20984 Info.flags = MachineMemOperand::MOStore;
20985 return true;
20986 }
20987 case Intrinsic::arm_neon_vst1x2:
20988 case Intrinsic::arm_neon_vst1x3:
20989 case Intrinsic::arm_neon_vst1x4: {
20990 Info.opc = ISD::INTRINSIC_VOID;
20991 // Conservatively set memVT to the entire set of vectors stored.
20992 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20993 unsigned NumElts = 0;
20994 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20995 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20996 if (!ArgTy->isVectorTy())
20997 break;
20998 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20999 }
21000 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21001 Info.ptrVal = I.getArgOperand(0);
21002 Info.offset = 0;
21003 Info.align.reset();
21004 // volatile stores with NEON intrinsics not supported
21005 Info.flags = MachineMemOperand::MOStore;
21006 return true;
21007 }
21008 case Intrinsic::arm_mve_vld2q:
21009 case Intrinsic::arm_mve_vld4q: {
21010 Info.opc = ISD::INTRINSIC_W_CHAIN;
21011 // Conservatively set memVT to the entire set of vectors loaded.
21012 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21013 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21014 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21015 Info.ptrVal = I.getArgOperand(0);
21016 Info.offset = 0;
21017 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21018 // volatile loads with MVE intrinsics not supported
21019 Info.flags = MachineMemOperand::MOLoad;
21020 return true;
21021 }
21022 case Intrinsic::arm_mve_vst2q:
21023 case Intrinsic::arm_mve_vst4q: {
21024 Info.opc = ISD::INTRINSIC_VOID;
21025 // Conservatively set memVT to the entire set of vectors stored.
21026 Type *VecTy = I.getArgOperand(1)->getType();
21027 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21028 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21029 Info.ptrVal = I.getArgOperand(0);
21030 Info.offset = 0;
21031 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21032 // volatile stores with MVE intrinsics not supported
21033 Info.flags = MachineMemOperand::MOStore;
21034 return true;
21035 }
21036 case Intrinsic::arm_mve_vldr_gather_base:
21037 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21038 Info.opc = ISD::INTRINSIC_W_CHAIN;
21039 Info.ptrVal = nullptr;
21040 Info.memVT = MVT::getVT(I.getType());
21041 Info.align = Align(1);
21042 Info.flags |= MachineMemOperand::MOLoad;
21043 return true;
21044 }
21045 case Intrinsic::arm_mve_vldr_gather_base_wb:
21046 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21047 Info.opc = ISD::INTRINSIC_W_CHAIN;
21048 Info.ptrVal = nullptr;
21049 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21050 Info.align = Align(1);
21051 Info.flags |= MachineMemOperand::MOLoad;
21052 return true;
21053 }
21054 case Intrinsic::arm_mve_vldr_gather_offset:
21055 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21056 Info.opc = ISD::INTRINSIC_W_CHAIN;
21057 Info.ptrVal = nullptr;
21058 MVT DataVT = MVT::getVT(I.getType());
21059 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21060 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21061 DataVT.getVectorNumElements());
21062 Info.align = Align(1);
21063 Info.flags |= MachineMemOperand::MOLoad;
21064 return true;
21065 }
21066 case Intrinsic::arm_mve_vstr_scatter_base:
21067 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21068 Info.opc = ISD::INTRINSIC_VOID;
21069 Info.ptrVal = nullptr;
21070 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21071 Info.align = Align(1);
21072 Info.flags |= MachineMemOperand::MOStore;
21073 return true;
21074 }
21075 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21076 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21077 Info.opc = ISD::INTRINSIC_W_CHAIN;
21078 Info.ptrVal = nullptr;
21079 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21080 Info.align = Align(1);
21081 Info.flags |= MachineMemOperand::MOStore;
21082 return true;
21083 }
21084 case Intrinsic::arm_mve_vstr_scatter_offset:
21085 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21086 Info.opc = ISD::INTRINSIC_VOID;
21087 Info.ptrVal = nullptr;
21088 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21089 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21090 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21091 DataVT.getVectorNumElements());
21092 Info.align = Align(1);
21093 Info.flags |= MachineMemOperand::MOStore;
21094 return true;
21095 }
21096 case Intrinsic::arm_ldaex:
21097 case Intrinsic::arm_ldrex: {
21098 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21099 Type *ValTy = I.getParamElementType(0);
21100 Info.opc = ISD::INTRINSIC_W_CHAIN;
21101 Info.memVT = MVT::getVT(ValTy);
21102 Info.ptrVal = I.getArgOperand(0);
21103 Info.offset = 0;
21104 Info.align = DL.getABITypeAlign(ValTy);
21105 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
21106 return true;
21107 }
21108 case Intrinsic::arm_stlex:
21109 case Intrinsic::arm_strex: {
21110 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21111 Type *ValTy = I.getParamElementType(1);
21112 Info.opc = ISD::INTRINSIC_W_CHAIN;
21113 Info.memVT = MVT::getVT(ValTy);
21114 Info.ptrVal = I.getArgOperand(1);
21115 Info.offset = 0;
21116 Info.align = DL.getABITypeAlign(ValTy);
21117 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
21118 return true;
21119 }
21120 case Intrinsic::arm_stlexd:
21121 case Intrinsic::arm_strexd:
21122 Info.opc = ISD::INTRINSIC_W_CHAIN;
21123 Info.memVT = MVT::i64;
21124 Info.ptrVal = I.getArgOperand(2);
21125 Info.offset = 0;
21126 Info.align = Align(8);
21127 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
21128 return true;
21129
21130 case Intrinsic::arm_ldaexd:
21131 case Intrinsic::arm_ldrexd:
21132 Info.opc = ISD::INTRINSIC_W_CHAIN;
21133 Info.memVT = MVT::i64;
21134 Info.ptrVal = I.getArgOperand(0);
21135 Info.offset = 0;
21136 Info.align = Align(8);
21137 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
21138 return true;
21139
21140 default:
21141 break;
21142 }
21143
21144 return false;
21145}
21146
21147/// Returns true if it is beneficial to convert a load of a constant
21148/// to just the constant itself.
21149bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
21150 Type *Ty) const {
21151 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 21151, __extension__ __PRETTY_FUNCTION__))
;
21152
21153 unsigned Bits = Ty->getPrimitiveSizeInBits();
21154 if (Bits == 0 || Bits > 32)
21155 return false;
21156 return true;
21157}
21158
21159bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
21160 unsigned Index) const {
21161 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
21162 return false;
21163
21164 return (Index == 0 || Index == ResVT.getVectorNumElements());
21165}
21166
21167Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder,
21168 ARM_MB::MemBOpt Domain) const {
21169 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21170
21171 // First, if the target has no DMB, see what fallback we can use.
21172 if (!Subtarget->hasDataBarrier()) {
21173 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21174 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21175 // here.
21176 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21177 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21178 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21179 Builder.getInt32(0), Builder.getInt32(7),
21180 Builder.getInt32(10), Builder.getInt32(5)};
21181 return Builder.CreateCall(MCR, args);
21182 } else {
21183 // Instead of using barriers, atomic accesses on these subtargets use
21184 // libcalls.
21185 llvm_unreachable("makeDMB on a target so old that it has no barriers")::llvm::llvm_unreachable_internal("makeDMB on a target so old that it has no barriers"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21185)
;
21186 }
21187 } else {
21188 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21189 // Only a full system barrier exists in the M-class architectures.
21190 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21191 Constant *CDomain = Builder.getInt32(Domain);
21192 return Builder.CreateCall(DMB, CDomain);
21193 }
21194}
21195
21196// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21197Instruction *ARMTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
21198 Instruction *Inst,
21199 AtomicOrdering Ord) const {
21200 switch (Ord) {
21201 case AtomicOrdering::NotAtomic:
21202 case AtomicOrdering::Unordered:
21203 llvm_unreachable("Invalid fence: unordered/non-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/non-atomic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21203)
;
21204 case AtomicOrdering::Monotonic:
21205 case AtomicOrdering::Acquire:
21206 return nullptr; // Nothing to do
21207 case AtomicOrdering::SequentiallyConsistent:
21208 if (!Inst->hasAtomicStore())
21209 return nullptr; // Nothing to do
21210 [[fallthrough]];
21211 case AtomicOrdering::Release:
21212 case AtomicOrdering::AcquireRelease:
21213 if (Subtarget->preferISHSTBarriers())
21214 return makeDMB(Builder, ARM_MB::ISHST);
21215 // FIXME: add a comment with a link to documentation justifying this.
21216 else
21217 return makeDMB(Builder, ARM_MB::ISH);
21218 }
21219 llvm_unreachable("Unknown fence ordering in emitLeadingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitLeadingFence"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21219)
;
21220}
21221
21222Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
21223 Instruction *Inst,
21224 AtomicOrdering Ord) const {
21225 switch (Ord) {
21226 case AtomicOrdering::NotAtomic:
21227 case AtomicOrdering::Unordered:
21228 llvm_unreachable("Invalid fence: unordered/not-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/not-atomic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21228)
;
21229 case AtomicOrdering::Monotonic:
21230 case AtomicOrdering::Release:
21231 return nullptr; // Nothing to do
21232 case AtomicOrdering::Acquire:
21233 case AtomicOrdering::AcquireRelease:
21234 case AtomicOrdering::SequentiallyConsistent:
21235 return makeDMB(Builder, ARM_MB::ISH);
21236 }
21237 llvm_unreachable("Unknown fence ordering in emitTrailingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitTrailingFence"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21237)
;
21238}
21239
21240// Loads and stores less than 64-bits are already atomic; ones above that
21241// are doomed anyway, so defer to the default libcall and blame the OS when
21242// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21243// anything for those.
21244TargetLoweringBase::AtomicExpansionKind
21245ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
21246 bool has64BitAtomicStore;
21247 if (Subtarget->isMClass())
21248 has64BitAtomicStore = false;
21249 else if (Subtarget->isThumb())
21250 has64BitAtomicStore = Subtarget->hasV7Ops();
21251 else
21252 has64BitAtomicStore = Subtarget->hasV6Ops();
21253
21254 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21255 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21256 : AtomicExpansionKind::None;
21257}
21258
21259// Loads and stores less than 64-bits are already atomic; ones above that
21260// are doomed anyway, so defer to the default libcall and blame the OS when
21261// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21262// anything for those.
21263// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21264// guarantee, see DDI0406C ARM architecture reference manual,
21265// sections A8.8.72-74 LDRD)
21266TargetLowering::AtomicExpansionKind
21267ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
21268 bool has64BitAtomicLoad;
21269 if (Subtarget->isMClass())
21270 has64BitAtomicLoad = false;
21271 else if (Subtarget->isThumb())
21272 has64BitAtomicLoad = Subtarget->hasV7Ops();
21273 else
21274 has64BitAtomicLoad = Subtarget->hasV6Ops();
21275
21276 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21277 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21278 : AtomicExpansionKind::None;
21279}
21280
21281// For the real atomic operations, we have ldrex/strex up to 32 bits,
21282// and up to 64 bits on the non-M profiles
21283TargetLowering::AtomicExpansionKind
21284ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
21285 if (AI->isFloatingPointOperation())
21286 return AtomicExpansionKind::CmpXChg;
21287
21288 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21289 bool hasAtomicRMW;
21290 if (Subtarget->isMClass())
21291 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21292 else if (Subtarget->isThumb())
21293 hasAtomicRMW = Subtarget->hasV7Ops();
21294 else
21295 hasAtomicRMW = Subtarget->hasV6Ops();
21296 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21297 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21298 // implement atomicrmw without spilling. If the target address is also on
21299 // the stack and close enough to the spill slot, this can lead to a
21300 // situation where the monitor always gets cleared and the atomic operation
21301 // can never succeed. So at -O0 lower this operation to a CAS loop.
21302 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
21303 return AtomicExpansionKind::CmpXChg;
21304 return AtomicExpansionKind::LLSC;
21305 }
21306 return AtomicExpansionKind::None;
21307}
21308
21309// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21310// bits, and up to 64 bits on the non-M profiles.
21311TargetLowering::AtomicExpansionKind
21312ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
21313 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21314 // implement cmpxchg without spilling. If the address being exchanged is also
21315 // on the stack and close enough to the spill slot, this can lead to a
21316 // situation where the monitor always gets cleared and the atomic operation
21317 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21318 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21319 bool HasAtomicCmpXchg;
21320 if (Subtarget->isMClass())
21321 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21322 else if (Subtarget->isThumb())
21323 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21324 else
21325 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21326 if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
21327 Size <= (Subtarget->isMClass() ? 32U : 64U))
21328 return AtomicExpansionKind::LLSC;
21329 return AtomicExpansionKind::None;
21330}
21331
21332bool ARMTargetLowering::shouldInsertFencesForAtomic(
21333 const Instruction *I) const {
21334 return InsertFencesForAtomic;
21335}
21336
21337bool ARMTargetLowering::useLoadStackGuardNode() const {
21338 // ROPI/RWPI are not supported currently.
21339 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21340}
21341
21342void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
21343 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21344 return TargetLowering::insertSSPDeclarations(M);
21345
21346 // MSVC CRT has a global variable holding security cookie.
21347 M.getOrInsertGlobal("__security_cookie",
21348 Type::getInt8PtrTy(M.getContext()));
21349
21350 // MSVC CRT has a function to validate security cookie.
21351 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21352 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21353 Type::getInt8PtrTy(M.getContext()));
21354 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21355 F->addParamAttr(0, Attribute::AttrKind::InReg);
21356}
21357
21358Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
21359 // MSVC CRT has a global variable holding security cookie.
21360 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21361 return M.getGlobalVariable("__security_cookie");
21362 return TargetLowering::getSDagStackGuard(M);
21363}
21364
21365Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
21366 // MSVC CRT has a function to validate security cookie.
21367 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21368 return M.getFunction("__security_check_cookie");
21369 return TargetLowering::getSSPStackGuardCheck(M);
21370}
21371
21372bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
21373 unsigned &Cost) const {
21374 // If we do not have NEON, vector types are not natively supported.
21375 if (!Subtarget->hasNEON())
21376 return false;
21377
21378 // Floating point values and vector values map to the same register file.
21379 // Therefore, although we could do a store extract of a vector type, this is
21380 // better to leave at float as we have more freedom in the addressing mode for
21381 // those.
21382 if (VectorTy->isFPOrFPVectorTy())
21383 return false;
21384
21385 // If the index is unknown at compile time, this is very expensive to lower
21386 // and it is not possible to combine the store with the extract.
21387 if (!isa<ConstantInt>(Idx))
21388 return false;
21389
21390 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type")(static_cast <bool> (VectorTy->isVectorTy() &&
"VectorTy is not a vector type") ? void (0) : __assert_fail (
"VectorTy->isVectorTy() && \"VectorTy is not a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21390, __extension__
__PRETTY_FUNCTION__))
;
21391 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21392 // We can do a store + vector extract on any vector that fits perfectly in a D
21393 // or Q register.
21394 if (BitWidth == 64 || BitWidth == 128) {
21395 Cost = 0;
21396 return true;
21397 }
21398 return false;
21399}
21400
21401bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
21402 return Subtarget->hasV6T2Ops();
21403}
21404
21405bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
21406 return Subtarget->hasV6T2Ops();
21407}
21408
21409bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(
21410 const Instruction &AndI) const {
21411 if (!Subtarget->hasV7Ops())
21412 return false;
21413
21414 // Sink the `and` instruction only if the mask would fit into a modified
21415 // immediate operand.
21416 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21417 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21418 return false;
21419 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21420 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21421 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21422}
21423
21424TargetLowering::ShiftLegalizationStrategy
21425ARMTargetLowering::preferredShiftLegalizationStrategy(
21426 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21427 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21428 return ShiftLegalizationStrategy::LowerToLibcall;
21429 return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
21430 ExpansionFactor);
21431}
21432
21433Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
21434 Value *Addr,
21435 AtomicOrdering Ord) const {
21436 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21437 bool IsAcquire = isAcquireOrStronger(Ord);
21438
21439 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21440 // intrinsic must return {i32, i32} and we have to recombine them into a
21441 // single i64 here.
21442 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21443 Intrinsic::ID Int =
21444 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21445 Function *Ldrex = Intrinsic::getDeclaration(M, Int);
21446
21447 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
21448 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21449
21450 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21451 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21452 if (!Subtarget->isLittle())
21453 std::swap (Lo, Hi);
21454 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21455 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21456 return Builder.CreateOr(
21457 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21458 }
21459
21460 Type *Tys[] = { Addr->getType() };
21461 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21462 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21463 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21464
21465 CI->addParamAttr(
21466 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21467 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21468}
21469
21470void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
21471 IRBuilderBase &Builder) const {
21472 if (!Subtarget->hasV7Ops())
21473 return;
21474 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21475 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21476}
21477
21478Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
21479 Value *Val, Value *Addr,
21480 AtomicOrdering Ord) const {
21481 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21482 bool IsRelease = isReleaseOrStronger(Ord);
21483
21484 // Since the intrinsics must have legal type, the i64 intrinsics take two
21485 // parameters: "i32, i32". We must marshal Val into the appropriate form
21486 // before the call.
21487 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21488 Intrinsic::ID Int =
21489 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21490 Function *Strex = Intrinsic::getDeclaration(M, Int);
21491 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21492
21493 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21494 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21495 if (!Subtarget->isLittle())
21496 std::swap(Lo, Hi);
21497 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
21498 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21499 }
21500
21501 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21502 Type *Tys[] = { Addr->getType() };
21503 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21504
21505 CallInst *CI = Builder.CreateCall(
21506 Strex, {Builder.CreateZExtOrBitCast(
21507 Val, Strex->getFunctionType()->getParamType(0)),
21508 Addr});
21509 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21510 Val->getType()));
21511 return CI;
21512}
21513
21514
21515bool ARMTargetLowering::alignLoopsWithOptSize() const {
21516 return Subtarget->isMClass();
21517}
21518
21519/// A helper function for determining the number of interleaved accesses we
21520/// will generate when lowering accesses of the given type.
21521unsigned
21522ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
21523 const DataLayout &DL) const {
21524 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21525}
21526
21527bool ARMTargetLowering::isLegalInterleavedAccessType(
21528 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21529 const DataLayout &DL) const {
21530
21531 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21532 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21533
21534 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21535 return false;
21536
21537 // Ensure the vector doesn't have f16 elements. Even though we could do an
21538 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21539 // f32.
21540 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21541 return false;
21542 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21543 return false;
21544
21545 // Ensure the number of vector elements is greater than 1.
21546 if (VecTy->getNumElements() < 2)
21547 return false;
21548
21549 // Ensure the element type is legal.
21550 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21551 return false;
21552 // And the alignment if high enough under MVE.
21553 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21554 return false;
21555
21556 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21557 // 128 will be split into multiple interleaved accesses.
21558 if (Subtarget->hasNEON() && VecSize == 64)
21559 return true;
21560 return VecSize % 128 == 0;
21561}
21562
21563unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const {
21564 if (Subtarget->hasNEON())
21565 return 4;
21566 if (Subtarget->hasMVEIntegerOps())
21567 return MVEMaxSupportedInterleaveFactor;
21568 return TargetLoweringBase::getMaxSupportedInterleaveFactor();
21569}
21570
21571/// Lower an interleaved load into a vldN intrinsic.
21572///
21573/// E.g. Lower an interleaved load (Factor = 2):
21574/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21575/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21576/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21577///
21578/// Into:
21579/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21580/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21581/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21582bool ARMTargetLowering::lowerInterleavedLoad(
21583 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
21584 ArrayRef<unsigned> Indices, unsigned Factor) const {
21585 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21586, __extension__
__PRETTY_FUNCTION__))
21586 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21586, __extension__
__PRETTY_FUNCTION__))
;
21587 assert(!Shuffles.empty() && "Empty shufflevector input")(static_cast <bool> (!Shuffles.empty() && "Empty shufflevector input"
) ? void (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21587, __extension__
__PRETTY_FUNCTION__))
;
21588 assert(Shuffles.size() == Indices.size() &&(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21589, __extension__
__PRETTY_FUNCTION__))
21589 "Unmatched number of shufflevectors and indices")(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21589, __extension__
__PRETTY_FUNCTION__))
;
21590
21591 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21592 Type *EltTy = VecTy->getElementType();
21593
21594 const DataLayout &DL = LI->getModule()->getDataLayout();
21595 Align Alignment = LI->getAlign();
21596
21597 // Skip if we do not have NEON and skip illegal vector types. We can
21598 // "legalize" wide vector types into multiple interleaved accesses as long as
21599 // the vector types are divisible by 128.
21600 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21601 return false;
21602
21603 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21604
21605 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21606 // load integer vectors first and then convert to pointer vectors.
21607 if (EltTy->isPointerTy())
21608 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21609
21610 IRBuilder<> Builder(LI);
21611
21612 // The base address of the load.
21613 Value *BaseAddr = LI->getPointerOperand();
21614
21615 if (NumLoads > 1) {
21616 // If we're going to generate more than one load, reset the sub-vector type
21617 // to something legal.
21618 VecTy = FixedVectorType::get(VecTy->getElementType(),
21619 VecTy->getNumElements() / NumLoads);
21620
21621 // We will compute the pointer operand of each load from the original base
21622 // address using GEPs. Cast the base address to a pointer to the scalar
21623 // element type.
21624 BaseAddr = Builder.CreateBitCast(
21625 BaseAddr,
21626 VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
21627 }
21628
21629 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!")(static_cast <bool> (isTypeLegal(EVT::getEVT(VecTy)) &&
"Illegal vldN vector type!") ? void (0) : __assert_fail ("isTypeLegal(EVT::getEVT(VecTy)) && \"Illegal vldN vector type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21629, __extension__
__PRETTY_FUNCTION__))
;
21630
21631 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21632 if (Subtarget->hasNEON()) {
21633 Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
21634 Type *Tys[] = {VecTy, Int8Ptr};
21635 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21636 Intrinsic::arm_neon_vld3,
21637 Intrinsic::arm_neon_vld4};
21638 Function *VldnFunc =
21639 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21640
21641 SmallVector<Value *, 2> Ops;
21642 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
21643 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21644
21645 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21646 } else {
21647 assert((Factor == 2 || Factor == 4) &&(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21648, __extension__
__PRETTY_FUNCTION__))
21648 "expected interleave factor of 2 or 4 for MVE")(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21648, __extension__
__PRETTY_FUNCTION__))
;
21649 Intrinsic::ID LoadInts =
21650 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21651 Type *VecEltTy =
21652 VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
21653 Type *Tys[] = {VecTy, VecEltTy};
21654 Function *VldnFunc =
21655 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21656
21657 SmallVector<Value *, 2> Ops;
21658 Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
21659 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21660 }
21661 };
21662
21663 // Holds sub-vectors extracted from the load intrinsic return values. The
21664 // sub-vectors are associated with the shufflevector instructions they will
21665 // replace.
21666 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
21667
21668 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21669 // If we're generating more than one load, compute the base address of
21670 // subsequent loads as an offset from the previous.
21671 if (LoadCount > 0)
21672 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21673 VecTy->getNumElements() * Factor);
21674
21675 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21676
21677 // Replace uses of each shufflevector with the corresponding vector loaded
21678 // by ldN.
21679 for (unsigned i = 0; i < Shuffles.size(); i++) {
21680 ShuffleVectorInst *SV = Shuffles[i];
21681 unsigned Index = Indices[i];
21682
21683 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21684
21685 // Convert the integer vector to pointer vector if the element is pointer.
21686 if (EltTy->isPointerTy())
21687 SubVec = Builder.CreateIntToPtr(
21688 SubVec,
21689 FixedVectorType::get(SV->getType()->getElementType(), VecTy));
21690
21691 SubVecs[SV].push_back(SubVec);
21692 }
21693 }
21694
21695 // Replace uses of the shufflevector instructions with the sub-vectors
21696 // returned by the load intrinsic. If a shufflevector instruction is
21697 // associated with more than one sub-vector, those sub-vectors will be
21698 // concatenated into a single wide vector.
21699 for (ShuffleVectorInst *SVI : Shuffles) {
21700 auto &SubVec = SubVecs[SVI];
21701 auto *WideVec =
21702 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21703 SVI->replaceAllUsesWith(WideVec);
21704 }
21705
21706 return true;
21707}
21708
21709/// Lower an interleaved store into a vstN intrinsic.
21710///
21711/// E.g. Lower an interleaved store (Factor = 3):
21712/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21713/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21714/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21715///
21716/// Into:
21717/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21718/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21719/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21720/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21721///
21722/// Note that the new shufflevectors will be removed and we'll only generate one
21723/// vst3 instruction in CodeGen.
21724///
21725/// Example for a more general valid mask (Factor 3). Lower:
21726/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21727/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21728/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21729///
21730/// Into:
21731/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21732/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21733/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21734/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21735bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
21736 ShuffleVectorInst *SVI,
21737 unsigned Factor) const {
21738 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21739, __extension__
__PRETTY_FUNCTION__))
21739 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21739, __extension__
__PRETTY_FUNCTION__))
;
21740
21741 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21742 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store")(static_cast <bool> (VecTy->getNumElements() % Factor
== 0 && "Invalid interleaved store") ? void (0) : __assert_fail
("VecTy->getNumElements() % Factor == 0 && \"Invalid interleaved store\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21742, __extension__
__PRETTY_FUNCTION__))
;
21743
21744 unsigned LaneLen = VecTy->getNumElements() / Factor;
21745 Type *EltTy = VecTy->getElementType();
21746 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21747
21748 const DataLayout &DL = SI->getModule()->getDataLayout();
21749 Align Alignment = SI->getAlign();
21750
21751 // Skip if we do not have NEON and skip illegal vector types. We can
21752 // "legalize" wide vector types into multiple interleaved accesses as long as
21753 // the vector types are divisible by 128.
21754 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21755 return false;
21756
21757 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21758
21759 Value *Op0 = SVI->getOperand(0);
21760 Value *Op1 = SVI->getOperand(1);
21761 IRBuilder<> Builder(SI);
21762
21763 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21764 // vectors to integer vectors.
21765 if (EltTy->isPointerTy()) {
21766 Type *IntTy = DL.getIntPtrType(EltTy);
21767
21768 // Convert to the corresponding integer vector.
21769 auto *IntVecTy =
21770 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21771 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21772 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21773
21774 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21775 }
21776
21777 // The base address of the store.
21778 Value *BaseAddr = SI->getPointerOperand();
21779
21780 if (NumStores > 1) {
21781 // If we're going to generate more than one store, reset the lane length
21782 // and sub-vector type to something legal.
21783 LaneLen /= NumStores;
21784 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21785
21786 // We will compute the pointer operand of each store from the original base
21787 // address using GEPs. Cast the base address to a pointer to the scalar
21788 // element type.
21789 BaseAddr = Builder.CreateBitCast(
21790 BaseAddr,
21791 SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
21792 }
21793
21794 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!")(static_cast <bool> (isTypeLegal(EVT::getEVT(SubVecTy))
&& "Illegal vstN vector type!") ? void (0) : __assert_fail
("isTypeLegal(EVT::getEVT(SubVecTy)) && \"Illegal vstN vector type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21794, __extension__
__PRETTY_FUNCTION__))
;
21795
21796 auto Mask = SVI->getShuffleMask();
21797
21798 auto createStoreIntrinsic = [&](Value *BaseAddr,
21799 SmallVectorImpl<Value *> &Shuffles) {
21800 if (Subtarget->hasNEON()) {
21801 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21802 Intrinsic::arm_neon_vst3,
21803 Intrinsic::arm_neon_vst4};
21804 Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
21805 Type *Tys[] = {Int8Ptr, SubVecTy};
21806
21807 Function *VstNFunc = Intrinsic::getDeclaration(
21808 SI->getModule(), StoreInts[Factor - 2], Tys);
21809
21810 SmallVector<Value *, 6> Ops;
21811 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
21812 append_range(Ops, Shuffles);
21813 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21814 Builder.CreateCall(VstNFunc, Ops);
21815 } else {
21816 assert((Factor == 2 || Factor == 4) &&(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21817, __extension__
__PRETTY_FUNCTION__))
21817 "expected interleave factor of 2 or 4 for MVE")(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21817, __extension__
__PRETTY_FUNCTION__))
;
21818 Intrinsic::ID StoreInts =
21819 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21820 Type *EltPtrTy = SubVecTy->getElementType()->getPointerTo(
21821 SI->getPointerAddressSpace());
21822 Type *Tys[] = {EltPtrTy, SubVecTy};
21823 Function *VstNFunc =
21824 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21825
21826 SmallVector<Value *, 6> Ops;
21827 Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
21828 append_range(Ops, Shuffles);
21829 for (unsigned F = 0; F < Factor; F++) {
21830 Ops.push_back(Builder.getInt32(F));
21831 Builder.CreateCall(VstNFunc, Ops);
21832 Ops.pop_back();
21833 }
21834 }
21835 };
21836
21837 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21838 // If we generating more than one store, we compute the base address of
21839 // subsequent stores as an offset from the previous.
21840 if (StoreCount > 0)
21841 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21842 BaseAddr, LaneLen * Factor);
21843
21844 SmallVector<Value *, 4> Shuffles;
21845
21846 // Split the shufflevector operands into sub vectors for the new vstN call.
21847 for (unsigned i = 0; i < Factor; i++) {
21848 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21849 if (Mask[IdxI] >= 0) {
21850 Shuffles.push_back(Builder.CreateShuffleVector(
21851 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21852 } else {
21853 unsigned StartMask = 0;
21854 for (unsigned j = 1; j < LaneLen; j++) {
21855 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21856 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21857 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21858 break;
21859 }
21860 }
21861 // Note: If all elements in a chunk are undefs, StartMask=0!
21862 // Note: Filling undef gaps with random elements is ok, since
21863 // those elements were being written anyway (with undefs).
21864 // In the case of all undefs we're defaulting to using elems from 0
21865 // Note: StartMask cannot be negative, it's checked in
21866 // isReInterleaveMask
21867 Shuffles.push_back(Builder.CreateShuffleVector(
21868 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21869 }
21870 }
21871
21872 createStoreIntrinsic(BaseAddr, Shuffles);
21873 }
21874 return true;
21875}
21876
21877enum HABaseType {
21878 HA_UNKNOWN = 0,
21879 HA_FLOAT,
21880 HA_DOUBLE,
21881 HA_VECT64,
21882 HA_VECT128
21883};
21884
21885static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
21886 uint64_t &Members) {
21887 if (auto *ST = dyn_cast<StructType>(Ty)) {
21888 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21889 uint64_t SubMembers = 0;
21890 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21891 return false;
21892 Members += SubMembers;
21893 }
21894 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21895 uint64_t SubMembers = 0;
21896 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21897 return false;
21898 Members += SubMembers * AT->getNumElements();
21899 } else if (Ty->isFloatTy()) {
21900 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21901 return false;
21902 Members = 1;
21903 Base = HA_FLOAT;
21904 } else if (Ty->isDoubleTy()) {
21905 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21906 return false;
21907 Members = 1;
21908 Base = HA_DOUBLE;
21909 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21910 Members = 1;
21911 switch (Base) {
21912 case HA_FLOAT:
21913 case HA_DOUBLE:
21914 return false;
21915 case HA_VECT64:
21916 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21917 case HA_VECT128:
21918 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21919 case HA_UNKNOWN:
21920 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21921 case 64:
21922 Base = HA_VECT64;
21923 return true;
21924 case 128:
21925 Base = HA_VECT128;
21926 return true;
21927 default:
21928 return false;
21929 }
21930 }
21931 }
21932
21933 return (Members > 0 && Members <= 4);
21934}
21935
21936/// Return the correct alignment for the current calling convention.
21937Align ARMTargetLowering::getABIAlignmentForCallingConv(
21938 Type *ArgTy, const DataLayout &DL) const {
21939 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21940 if (!ArgTy->isVectorTy())
21941 return ABITypeAlign;
21942
21943 // Avoid over-aligning vector parameters. It would require realigning the
21944 // stack and waste space for no real benefit.
21945 return std::min(ABITypeAlign, DL.getStackAlignment());
21946}
21947
21948/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21949/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21950/// passing according to AAPCS rules.
21951bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
21952 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21953 const DataLayout &DL) const {
21954 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21955 CallingConv::ARM_AAPCS_VFP)
21956 return false;
21957
21958 HABaseType Base = HA_UNKNOWN;
21959 uint64_t Members = 0;
21960 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21961 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "isHA: " << IsHA <<
" "; Ty->dump(); } } while (false)
;
21962
21963 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21964 return IsHA || IsIntArray;
21965}
21966
21967Register ARMTargetLowering::getExceptionPointerRegister(
21968 const Constant *PersonalityFn) const {
21969 // Platforms which do not use SjLj EH may return values in these registers
21970 // via the personality function.
21971 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
21972}
21973
21974Register ARMTargetLowering::getExceptionSelectorRegister(
21975 const Constant *PersonalityFn) const {
21976 // Platforms which do not use SjLj EH may return values in these registers
21977 // via the personality function.
21978 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
21979}
21980
21981void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21982 // Update IsSplitCSR in ARMFunctionInfo.
21983 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21984 AFI->setIsSplitCSR(true);
21985}
21986
21987void ARMTargetLowering::insertCopiesSplitCSR(
21988 MachineBasicBlock *Entry,
21989 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21990 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21991 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21992 if (!IStart)
21993 return;
21994
21995 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21996 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21997 MachineBasicBlock::iterator MBBI = Entry->begin();
21998 for (const MCPhysReg *I = IStart; *I; ++I) {
21999 const TargetRegisterClass *RC = nullptr;
22000 if (ARM::GPRRegClass.contains(*I))
22001 RC = &ARM::GPRRegClass;
22002 else if (ARM::DPRRegClass.contains(*I))
22003 RC = &ARM::DPRRegClass;
22004 else
22005 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 22005)
;
22006
22007 Register NewVR = MRI->createVirtualRegister(RC);
22008 // Create copy from CSR to a virtual register.
22009 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22010 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22011 // nounwind. If we want to generalize this later, we may need to emit
22012 // CFI pseudo-instructions.
22013 assert(Entry->getParent()->getFunction().hasFnAttribute((static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 22015, __extension__
__PRETTY_FUNCTION__))
22014 Attribute::NoUnwind) &&(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 22015, __extension__
__PRETTY_FUNCTION__))
22015 "Function should be nounwind in insertCopiesSplitCSR!")(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 22015, __extension__
__PRETTY_FUNCTION__))
;
22016 Entry->addLiveIn(*I);
22017 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22018 .addReg(*I);
22019
22020 // Insert the copy-back instructions right before the terminator.
22021 for (auto *Exit : Exits)
22022 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22023 TII->get(TargetOpcode::COPY), *I)
22024 .addReg(NewVR);
22025 }
22026}
22027
22028void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
22029 MF.getFrameInfo().computeMaxCallFrameSize(MF);
22030 TargetLoweringBase::finalizeLowering(MF);
22031}
22032
22033bool ARMTargetLowering::isComplexDeinterleavingSupported() const {
22034 return Subtarget->hasMVEIntegerOps();
22035}
22036
22037bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(
22038 ComplexDeinterleavingOperation Operation, Type *Ty) const {
22039 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22040 if (!VTy)
22041 return false;
22042
22043 auto *ScalarTy = VTy->getScalarType();
22044 unsigned NumElements = VTy->getNumElements();
22045
22046 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22047 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22048 return false;
22049
22050 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22051 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22052 return Subtarget->hasMVEFloatOps();
22053
22054 if (Operation != ComplexDeinterleavingOperation::CAdd)
22055 return false;
22056
22057 return Subtarget->hasMVEIntegerOps() &&
22058 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22059 ScalarTy->isIntegerTy(32));
22060}
22061
22062Value *ARMTargetLowering::createComplexDeinterleavingIR(
22063 Instruction *I, ComplexDeinterleavingOperation OperationType,
22064 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22065 Value *Accumulator) const {
22066
22067 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22068
22069 IRBuilder<> B(I);
22070
22071 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22072
22073 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits")(static_cast <bool> (TyWidth >= 128 && "Width of vector type must be at least 128 bits"
) ? void (0) : __assert_fail ("TyWidth >= 128 && \"Width of vector type must be at least 128 bits\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 22073, __extension__
__PRETTY_FUNCTION__))
;
22074
22075 if (TyWidth > 128) {
22076 int Stride = Ty->getNumElements() / 2;
22077 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22078 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22079 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22080 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22081
22082 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22083 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22084 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22085 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22086 Value *LowerSplitAcc = nullptr;
22087 Value *UpperSplitAcc = nullptr;
22088
22089 if (Accumulator) {
22090 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22091 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22092 }
22093
22094 auto *LowerSplitInt = createComplexDeinterleavingIR(
22095 I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22096 auto *UpperSplitInt = createComplexDeinterleavingIR(
22097 I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22098
22099 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22100 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22101 }
22102
22103 auto *IntTy = Type::getInt32Ty(B.getContext());
22104
22105 ConstantInt *ConstRotation = nullptr;
22106 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22107 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22108
22109 if (Accumulator)
22110 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22111 {ConstRotation, Accumulator, InputB, InputA});
22112 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22113 {ConstRotation, InputB, InputA});
22114 }
22115
22116 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22117 // 1 means the value is not halved.
22118 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22119
22120 if (Rotation == ComplexDeinterleavingRotation::Rotation_90)
22121 ConstRotation = ConstantInt::get(IntTy, 0);
22122 else if (Rotation == ComplexDeinterleavingRotation::Rotation_270)
22123 ConstRotation = ConstantInt::get(IntTy, 1);
22124
22125 if (!ConstRotation)
22126 return nullptr; // Invalid rotation for arm_mve_vcaddq
22127
22128 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22129 {ConstHalving, ConstRotation, InputA, InputB});
22130 }
22131
22132 return nullptr;
22133}

/build/source/llvm/include/llvm/CodeGen/ValueTypes.h

1//===- CodeGen/ValueTypes.h - Low-Level Target independ. types --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the set of low-level target independent types which various
10// values in the code generator are. This allows the target specific behavior
11// of instructions to be described to target independent passes.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CODEGEN_VALUETYPES_H
16#define LLVM_CODEGEN_VALUETYPES_H
17
18#include "llvm/CodeGen/MachineValueType.h"
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/MathExtras.h"
21#include "llvm/Support/TypeSize.h"
22#include <cassert>
23#include <cstdint>
24#include <string>
25
26namespace llvm {
27
28 class LLVMContext;
29 class Type;
30
31 /// Extended Value Type. Capable of holding value types which are not native
32 /// for any processor (such as the i12345 type), as well as the types an MVT
33 /// can represent.
34 struct EVT {
35 private:
36 MVT V = MVT::INVALID_SIMPLE_VALUE_TYPE;
37 Type *LLVMTy = nullptr;
38
39 public:
40 constexpr EVT() = default;
41 constexpr EVT(MVT::SimpleValueType SVT) : V(SVT) {}
42 constexpr EVT(MVT S) : V(S) {}
43
44 bool operator==(EVT VT) const {
45 return !(*this != VT);
46 }
47 bool operator!=(EVT VT) const {
48 if (V.SimpleTy != VT.V.SimpleTy)
49 return true;
50 if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
51 return LLVMTy != VT.LLVMTy;
52 return false;
53 }
54
55 /// Returns the EVT that represents a floating-point type with the given
56 /// number of bits. There are two floating-point types with 128 bits - this
57 /// returns f128 rather than ppcf128.
58 static EVT getFloatingPointVT(unsigned BitWidth) {
59 return MVT::getFloatingPointVT(BitWidth);
60 }
61
62 /// Returns the EVT that represents an integer with the given number of
63 /// bits.
64 static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
65 MVT M = MVT::getIntegerVT(BitWidth);
66 if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
67 return M;
68 return getExtendedIntegerVT(Context, BitWidth);
69 }
70
71 /// Returns the EVT that represents a vector NumElements in length, where
72 /// each element is of type VT.
73 static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
74 bool IsScalable = false) {
75 MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable);
76 if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
77 return M;
78 return getExtendedVectorVT(Context, VT, NumElements, IsScalable);
79 }
80
81 /// Returns the EVT that represents a vector EC.Min elements in length,
82 /// where each element is of type VT.
83 static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
84 MVT M = MVT::getVectorVT(VT.V, EC);
85 if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
86 return M;
87 return getExtendedVectorVT(Context, VT, EC);
88 }
89
90 /// Return a vector with the same number of elements as this vector, but
91 /// with the element type converted to an integer type with the same
92 /// bitwidth.
93 EVT changeVectorElementTypeToInteger() const {
94 if (isSimple())
95 return getSimpleVT().changeVectorElementTypeToInteger();
96 return changeExtendedVectorElementTypeToInteger();
97 }
98
99 /// Return a VT for a vector type whose attributes match ourselves
100 /// with the exception of the element type that is chosen by the caller.
101 EVT changeVectorElementType(EVT EltVT) const {
102 if (isSimple()) {
103 assert(EltVT.isSimple() &&(static_cast <bool> (EltVT.isSimple() && "Can't change simple vector VT to have extended element VT"
) ? void (0) : __assert_fail ("EltVT.isSimple() && \"Can't change simple vector VT to have extended element VT\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 104, __extension__
__PRETTY_FUNCTION__))
104 "Can't change simple vector VT to have extended element VT")(static_cast <bool> (EltVT.isSimple() && "Can't change simple vector VT to have extended element VT"
) ? void (0) : __assert_fail ("EltVT.isSimple() && \"Can't change simple vector VT to have extended element VT\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 104, __extension__
__PRETTY_FUNCTION__))
;
105 return getSimpleVT().changeVectorElementType(EltVT.getSimpleVT());
106 }
107 return changeExtendedVectorElementType(EltVT);
108 }
109
110 /// Return the type converted to an equivalently sized integer or vector
111 /// with integer element type. Similar to changeVectorElementTypeToInteger,
112 /// but also handles scalars.
113 EVT changeTypeToInteger() {
114 if (isVector())
115 return changeVectorElementTypeToInteger();
116
117 if (isSimple())
118 return getSimpleVT().changeTypeToInteger();
119 return changeExtendedTypeToInteger();
120 }
121
122 /// Test if the given EVT has zero size, this will fail if called on a
123 /// scalable type
124 bool isZeroSized() const {
125 return getSizeInBits().isZero();
126 }
127
128 /// Test if the given EVT is simple (as opposed to being extended).
129 bool isSimple() const {
130 return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE;
131 }
132
133 /// Test if the given EVT is extended (as opposed to being simple).
134 bool isExtended() const {
135 return !isSimple();
136 }
137
138 /// Return true if this is a FP or a vector FP type.
139 bool isFloatingPoint() const {
140 return isSimple() ? V.isFloatingPoint() : isExtendedFloatingPoint();
141 }
142
143 /// Return true if this is an integer or a vector integer type.
144 bool isInteger() const {
145 return isSimple() ? V.isInteger() : isExtendedInteger();
146 }
147
148 /// Return true if this is an integer, but not a vector.
149 bool isScalarInteger() const {
150 return isSimple() ? V.isScalarInteger() : isExtendedScalarInteger();
151 }
152
153 /// Return true if this is a vector type where the runtime
154 /// length is machine dependent
155 bool isScalableTargetExtVT() const {
156 return isSimple() && V.isScalableTargetExtVT();
157 }
158
159 /// Return true if this is a vector value type.
160 bool isVector() const {
161 return isSimple() ? V.isVector() : isExtendedVector();
162 }
163
164 /// Return true if this is a vector type where the runtime
165 /// length is machine dependent
166 bool isScalableVector() const {
167 return isSimple() ? V.isScalableVector() : isExtendedScalableVector();
168 }
169
170 bool isFixedLengthVector() const {
171 return isSimple() ? V.isFixedLengthVector()
172 : isExtendedFixedLengthVector();
173 }
174
175 /// Return true if the type is a scalable type.
176 bool isScalableVT() const {
177 return isScalableVector() || isScalableTargetExtVT();
178 }
179
180 /// Return true if this is a 16-bit vector type.
181 bool is16BitVector() const {
182 return isSimple() ? V.is16BitVector() : isExtended16BitVector();
183 }
184
185 /// Return true if this is a 32-bit vector type.
186 bool is32BitVector() const {
187 return isSimple() ? V.is32BitVector() : isExtended32BitVector();
188 }
189
190 /// Return true if this is a 64-bit vector type.
191 bool is64BitVector() const {
192 return isSimple() ? V.is64BitVector() : isExtended64BitVector();
193 }
194
195 /// Return true if this is a 128-bit vector type.
196 bool is128BitVector() const {
197 return isSimple() ? V.is128BitVector() : isExtended128BitVector();
198 }
199
200 /// Return true if this is a 256-bit vector type.
201 bool is256BitVector() const {
202 return isSimple() ? V.is256BitVector() : isExtended256BitVector();
203 }
204
205 /// Return true if this is a 512-bit vector type.
206 bool is512BitVector() const {
207 return isSimple() ? V.is512BitVector() : isExtended512BitVector();
208 }
209
210 /// Return true if this is a 1024-bit vector type.
211 bool is1024BitVector() const {
212 return isSimple() ? V.is1024BitVector() : isExtended1024BitVector();
213 }
214
215 /// Return true if this is a 2048-bit vector type.
216 bool is2048BitVector() const {
217 return isSimple() ? V.is2048BitVector() : isExtended2048BitVector();
218 }
219
220 /// Return true if this is an overloaded type for TableGen.
221 bool isOverloaded() const {
222 return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny);
223 }
224
225 /// Return true if the bit size is a multiple of 8.
226 bool isByteSized() const {
227 return !isZeroSized() && getSizeInBits().isKnownMultipleOf(8);
228 }
229
230 /// Return true if the size is a power-of-two number of bytes.
231 bool isRound() const {
232 if (isScalableVector())
233 return false;
234 unsigned BitSize = getSizeInBits();
235 return BitSize >= 8 && !(BitSize & (BitSize - 1));
236 }
237
238 /// Return true if this has the same number of bits as VT.
239 bool bitsEq(EVT VT) const {
240 if (EVT::operator==(VT)) return true;
241 return getSizeInBits() == VT.getSizeInBits();
242 }
243
244 /// Return true if we know at compile time this has more bits than VT.
245 bool knownBitsGT(EVT VT) const {
246 return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits());
247 }
248
249 /// Return true if we know at compile time this has more than or the same
250 /// bits as VT.
251 bool knownBitsGE(EVT VT) const {
252 return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits());
253 }
254
255 /// Return true if we know at compile time this has fewer bits than VT.
256 bool knownBitsLT(EVT VT) const {
257 return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits());
258 }
259
260 /// Return true if we know at compile time this has fewer than or the same
261 /// bits as VT.
262 bool knownBitsLE(EVT VT) const {
263 return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits());
264 }
265
266 /// Return true if this has more bits than VT.
267 bool bitsGT(EVT VT) const {
268 if (EVT::operator==(VT)) return false;
269 assert(isScalableVector() == VT.isScalableVector() &&(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 270, __extension__
__PRETTY_FUNCTION__))
270 "Comparison between scalable and fixed types")(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 270, __extension__
__PRETTY_FUNCTION__))
;
271 return knownBitsGT(VT);
272 }
273
274 /// Return true if this has no less bits than VT.
275 bool bitsGE(EVT VT) const {
276 if (EVT::operator==(VT)) return true;
277 assert(isScalableVector() == VT.isScalableVector() &&(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 278, __extension__
__PRETTY_FUNCTION__))
278 "Comparison between scalable and fixed types")(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 278, __extension__
__PRETTY_FUNCTION__))
;
279 return knownBitsGE(VT);
280 }
281
282 /// Return true if this has less bits than VT.
283 bool bitsLT(EVT VT) const {
284 if (EVT::operator==(VT)) return false;
285 assert(isScalableVector() == VT.isScalableVector() &&(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 286, __extension__
__PRETTY_FUNCTION__))
286 "Comparison between scalable and fixed types")(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 286, __extension__
__PRETTY_FUNCTION__))
;
287 return knownBitsLT(VT);
288 }
289
290 /// Return true if this has no more bits than VT.
291 bool bitsLE(EVT VT) const {
292 if (EVT::operator==(VT)) return true;
293 assert(isScalableVector() == VT.isScalableVector() &&(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 294, __extension__
__PRETTY_FUNCTION__))
294 "Comparison between scalable and fixed types")(static_cast <bool> (isScalableVector() == VT.isScalableVector
() && "Comparison between scalable and fixed types") ?
void (0) : __assert_fail ("isScalableVector() == VT.isScalableVector() && \"Comparison between scalable and fixed types\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 294, __extension__
__PRETTY_FUNCTION__))
;
295 return knownBitsLE(VT);
296 }
297
298 /// Return the SimpleValueType held in the specified simple EVT.
299 MVT getSimpleVT() const {
300 assert(isSimple() && "Expected a SimpleValueType!")(static_cast <bool> (isSimple() && "Expected a SimpleValueType!"
) ? void (0) : __assert_fail ("isSimple() && \"Expected a SimpleValueType!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 300, __extension__
__PRETTY_FUNCTION__))
;
301 return V;
302 }
303
304 /// If this is a vector type, return the element type, otherwise return
305 /// this.
306 EVT getScalarType() const {
307 return isVector() ? getVectorElementType() : *this;
308 }
309
310 /// Given a vector type, return the type of each element.
311 EVT getVectorElementType() const {
312 assert(isVector() && "Invalid vector type!")(static_cast <bool> (isVector() && "Invalid vector type!"
) ? void (0) : __assert_fail ("isVector() && \"Invalid vector type!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 312, __extension__
__PRETTY_FUNCTION__))
;
313 if (isSimple())
314 return V.getVectorElementType();
315 return getExtendedVectorElementType();
316 }
317
318 /// Given a vector type, return the number of elements it contains.
319 unsigned getVectorNumElements() const {
320 assert(isVector() && "Invalid vector type!")(static_cast <bool> (isVector() && "Invalid vector type!"
) ? void (0) : __assert_fail ("isVector() && \"Invalid vector type!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 320, __extension__
__PRETTY_FUNCTION__))
;
14
Assuming the condition is true
15
'?' condition is true
321
322 if (isScalableVector())
16
Assuming the condition is false
323 llvm::reportInvalidSizeRequest(
324 "Possible incorrect use of EVT::getVectorNumElements() for "
325 "scalable vector. Scalable flag may be dropped, use "
326 "EVT::getVectorElementCount() instead");
327
328 return isSimple() ? V.getVectorNumElements()
17
Taking false branch
18
'?' condition is false
19
Returning value
329 : getExtendedVectorNumElements();
330 }
331
332 // Given a (possibly scalable) vector type, return the ElementCount
333 ElementCount getVectorElementCount() const {
334 assert((isVector()) && "Invalid vector type!")(static_cast <bool> ((isVector()) && "Invalid vector type!"
) ? void (0) : __assert_fail ("(isVector()) && \"Invalid vector type!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 334, __extension__
__PRETTY_FUNCTION__))
;
335 if (isSimple())
336 return V.getVectorElementCount();
337
338 return getExtendedVectorElementCount();
339 }
340
341 /// Given a vector type, return the minimum number of elements it contains.
342 unsigned getVectorMinNumElements() const {
343 return getVectorElementCount().getKnownMinValue();
344 }
345
346 /// Return the size of the specified value type in bits.
347 ///
348 /// If the value type is a scalable vector type, the scalable property will
349 /// be set and the runtime size will be a positive integer multiple of the
350 /// base size.
351 TypeSize getSizeInBits() const {
352 if (isSimple())
353 return V.getSizeInBits();
354 return getExtendedSizeInBits();
355 }
356
357 /// Return the size of the specified fixed width value type in bits. The
358 /// function will assert if the type is scalable.
359 uint64_t getFixedSizeInBits() const {
360 return getSizeInBits().getFixedValue();
361 }
362
363 uint64_t getScalarSizeInBits() const {
364 return getScalarType().getSizeInBits().getFixedValue();
365 }
366
367 /// Return the number of bytes overwritten by a store of the specified value
368 /// type.
369 ///
370 /// If the value type is a scalable vector type, the scalable property will
371 /// be set and the runtime size will be a positive integer multiple of the
372 /// base size.
373 TypeSize getStoreSize() const {
374 TypeSize BaseSize = getSizeInBits();
375 return {(BaseSize.getKnownMinValue() + 7) / 8, BaseSize.isScalable()};
376 }
377
378 // Return the number of bytes overwritten by a store of this value type or
379 // this value type's element type in the case of a vector.
380 uint64_t getScalarStoreSize() const {
381 return getScalarType().getStoreSize().getFixedValue();
382 }
383
384 /// Return the number of bits overwritten by a store of the specified value
385 /// type.
386 ///
387 /// If the value type is a scalable vector type, the scalable property will
388 /// be set and the runtime size will be a positive integer multiple of the
389 /// base size.
390 TypeSize getStoreSizeInBits() const {
391 return getStoreSize() * 8;
392 }
393
394 /// Rounds the bit-width of the given integer EVT up to the nearest power of
395 /// two (and at least to eight), and returns the integer EVT with that
396 /// number of bits.
397 EVT getRoundIntegerType(LLVMContext &Context) const {
398 assert(isInteger() && !isVector() && "Invalid integer type!")(static_cast <bool> (isInteger() && !isVector()
&& "Invalid integer type!") ? void (0) : __assert_fail
("isInteger() && !isVector() && \"Invalid integer type!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 398, __extension__
__PRETTY_FUNCTION__))
;
399 unsigned BitWidth = getSizeInBits();
400 if (BitWidth <= 8)
401 return EVT(MVT::i8);
402 return getIntegerVT(Context, llvm::bit_ceil(BitWidth));
403 }
404
405 /// Finds the smallest simple value type that is greater than or equal to
406 /// half the width of this EVT. If no simple value type can be found, an
407 /// extended integer value type of half the size (rounded up) is returned.
408 EVT getHalfSizedIntegerVT(LLVMContext &Context) const {
409 assert(isInteger() && !isVector() && "Invalid integer type!")(static_cast <bool> (isInteger() && !isVector()
&& "Invalid integer type!") ? void (0) : __assert_fail
("isInteger() && !isVector() && \"Invalid integer type!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 409, __extension__
__PRETTY_FUNCTION__))
;
410 unsigned EVTSize = getSizeInBits();
411 for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
412 IntVT <= MVT::LAST_INTEGER_VALUETYPE; ++IntVT) {
413 EVT HalfVT = EVT((MVT::SimpleValueType)IntVT);
414 if (HalfVT.getSizeInBits() * 2 >= EVTSize)
415 return HalfVT;
416 }
417 return getIntegerVT(Context, (EVTSize + 1) / 2);
418 }
419
420 /// Return a VT for an integer vector type with the size of the
421 /// elements doubled. The typed returned may be an extended type.
422 EVT widenIntegerVectorElementType(LLVMContext &Context) const {
423 EVT EltVT = getVectorElementType();
424 EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits());
425 return EVT::getVectorVT(Context, EltVT, getVectorElementCount());
426 }
427
428 // Return a VT for a vector type with the same element type but
429 // half the number of elements. The type returned may be an
430 // extended type.
431 EVT getHalfNumVectorElementsVT(LLVMContext &Context) const {
432 EVT EltVT = getVectorElementType();
433 auto EltCnt = getVectorElementCount();
434 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!")(static_cast <bool> (EltCnt.isKnownEven() && "Splitting vector, but not in half!"
) ? void (0) : __assert_fail ("EltCnt.isKnownEven() && \"Splitting vector, but not in half!\""
, "llvm/include/llvm/CodeGen/ValueTypes.h", 434, __extension__
__PRETTY_FUNCTION__))
;
435 return EVT::getVectorVT(Context, EltVT, EltCnt.divideCoefficientBy(2));
436 }
437
438 // Return a VT for a vector type with the same element type but
439 // double the number of elements. The type returned may be an
440 // extended type.
441 EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const {
442 EVT EltVT = getVectorElementType();
443 auto EltCnt = getVectorElementCount();
444 return EVT::getVectorVT(Context, EltVT, EltCnt * 2);
445 }
446
447 /// Returns true if the given vector is a power of 2.
448 bool isPow2VectorType() const {
449 unsigned NElts = getVectorMinNumElements();
450 return !(NElts & (NElts - 1));
451 }
452
453 /// Widens the length of the given vector EVT up to the nearest power of 2
454 /// and returns that type.
455 EVT getPow2VectorType(LLVMContext &Context) const {
456 if (!isPow2VectorType()) {
457 ElementCount NElts = getVectorElementCount();
458 unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue());
459 NElts = ElementCount::get(NewMinCount, NElts.isScalable());
460 return EVT::getVectorVT(Context, getVectorElementType(), NElts);
461 }
462 else {
463 return *this;
464 }
465 }
466
467 /// This function returns value type as a string, e.g. "i32".
468 std::string getEVTString() const;
469
470 /// Support for debugging, callable in GDB: VT.dump()
471 void dump() const;
472
473 /// Implement operator<<.
474 void print(raw_ostream &OS) const {
475 OS << getEVTString();
476 }
477
478 /// This method returns an LLVM type corresponding to the specified EVT.
479 /// For integer types, this returns an unsigned type. Note that this will
480 /// abort for types that cannot be represented.
481 Type *getTypeForEVT(LLVMContext &Context) const;
482
483 /// Return the value type corresponding to the specified type.
484 /// This returns all pointers as iPTR. If HandleUnknown is true, unknown
485 /// types are returned as Other, otherwise they are invalid.
486 static EVT getEVT(Type *Ty, bool HandleUnknown = false);
487
488 intptr_t getRawBits() const {
489 if (isSimple())
490 return V.SimpleTy;
491 else
492 return (intptr_t)(LLVMTy);
493 }
494
495 /// A meaningless but well-behaved order, useful for constructing
496 /// containers.
497 struct compareRawBits {
498 bool operator()(EVT L, EVT R) const {
499 if (L.V.SimpleTy == R.V.SimpleTy)
500 return L.LLVMTy < R.LLVMTy;
501 else
502 return L.V.SimpleTy < R.V.SimpleTy;
503 }
504 };
505
506 private:
507 // Methods for handling the Extended-type case in functions above.
508 // These are all out-of-line to prevent users of this header file
509 // from having a dependency on Type.h.
510 EVT changeExtendedTypeToInteger() const;
511 EVT changeExtendedVectorElementType(EVT EltVT) const;
512 EVT changeExtendedVectorElementTypeToInteger() const;
513 static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
514 static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, unsigned NumElements,
515 bool IsScalable);
516 static EVT getExtendedVectorVT(LLVMContext &Context, EVT VT,
517 ElementCount EC);
518 bool isExtendedFloatingPoint() const LLVM_READONLY__attribute__((__pure__));
519 bool isExtendedInteger() const LLVM_READONLY__attribute__((__pure__));
520 bool isExtendedScalarInteger() const LLVM_READONLY__attribute__((__pure__));
521 bool isExtendedVector() const LLVM_READONLY__attribute__((__pure__));
522 bool isExtended16BitVector() const LLVM_READONLY__attribute__((__pure__));
523 bool isExtended32BitVector() const LLVM_READONLY__attribute__((__pure__));
524 bool isExtended64BitVector() const LLVM_READONLY__attribute__((__pure__));
525 bool isExtended128BitVector() const LLVM_READONLY__attribute__((__pure__));
526 bool isExtended256BitVector() const LLVM_READONLY__attribute__((__pure__));
527 bool isExtended512BitVector() const LLVM_READONLY__attribute__((__pure__));
528 bool isExtended1024BitVector() const LLVM_READONLY__attribute__((__pure__));
529 bool isExtended2048BitVector() const LLVM_READONLY__attribute__((__pure__));
530 bool isExtendedFixedLengthVector() const LLVM_READONLY__attribute__((__pure__));
531 bool isExtendedScalableVector() const LLVM_READONLY__attribute__((__pure__));
532 EVT getExtendedVectorElementType() const;
533 unsigned getExtendedVectorNumElements() const LLVM_READONLY__attribute__((__pure__));
534 ElementCount getExtendedVectorElementCount() const LLVM_READONLY__attribute__((__pure__));
535 TypeSize getExtendedSizeInBits() const LLVM_READONLY__attribute__((__pure__));
536 };
537
538 inline raw_ostream &operator<<(raw_ostream &OS, const EVT &V) {
539 V.print(OS);
540 return OS;
541 }
542} // end namespace llvm
543
544#endif // LLVM_CODEGEN_VALUETYPES_H