Bug Summary

File:llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 9919, column 31
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-10-27-053609-25509-1 -x c++ /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCRegisterInfo.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MachineValueType.h"
90#include "llvm/Support/MathExtras.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Target/TargetMachine.h"
93#include "llvm/Target/TargetOptions.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124static cl::opt<bool> EnablePPCPCRelTLS(
125 "enable-ppc-pcrel-tls",
126 cl::desc("enable the use of PC relative memops in TLS instructions on PPC"),
127 cl::Hidden);
128
129STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"}
;
130STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"}
;
131STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
}
;
132STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
}
;
133
134static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135
136static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137
138// FIXME: Remove this once the bug has been fixed!
139extern cl::opt<bool> ANDIGlueBug;
140
141PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142 const PPCSubtarget &STI)
143 : TargetLowering(TM), Subtarget(STI) {
144 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145 // arguments are at least 4/8 bytes aligned.
146 bool isPPC64 = Subtarget.isPPC64();
147 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148
149 // Set up the register classes.
150 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151 if (!useSoftFloat()) {
152 if (hasSPE()) {
153 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155 } else {
156 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158 }
159 }
160
161 // Match BITREVERSE to customized fast code sequence in the td file.
162 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164
165 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167
168 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169 for (MVT VT : MVT::integer_valuetypes()) {
170 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172 }
173
174 if (Subtarget.isISA3_0()) {
175 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179 } else {
180 // No extending loads from f16 or HW conversions back and forth.
181 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 }
190
191 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192
193 // PowerPC has pre-inc load and store's.
194 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204 if (!Subtarget.hasSPE()) {
205 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209 }
210
211 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213 for (MVT VT : ScalarIntVTs) {
214 setOperationAction(ISD::ADDC, VT, Legal);
215 setOperationAction(ISD::ADDE, VT, Legal);
216 setOperationAction(ISD::SUBC, VT, Legal);
217 setOperationAction(ISD::SUBE, VT, Legal);
218 }
219
220 if (Subtarget.useCRBits()) {
221 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222
223 if (isPPC64 || Subtarget.hasFPCVT()) {
224 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
225 AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
226 isPPC64 ? MVT::i64 : MVT::i32);
227 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
228 AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
229 isPPC64 ? MVT::i64 : MVT::i32);
230
231 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
232 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
233 isPPC64 ? MVT::i64 : MVT::i32);
234 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
235 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
236 isPPC64 ? MVT::i64 : MVT::i32);
237 } else {
238 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
239 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
242 }
243
244 // PowerPC does not support direct load/store of condition registers.
245 setOperationAction(ISD::LOAD, MVT::i1, Custom);
246 setOperationAction(ISD::STORE, MVT::i1, Custom);
247
248 // FIXME: Remove this once the ANDI glue bug is fixed:
249 if (ANDIGlueBug)
250 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
251
252 for (MVT VT : MVT::integer_valuetypes()) {
253 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
254 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
255 setTruncStoreAction(VT, MVT::i1, Expand);
256 }
257
258 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
259 }
260
261 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
262 // PPC (the libcall is not available).
263 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
264 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
265 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
267
268 // We do not currently implement these libm ops for PowerPC.
269 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
270 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
271 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
272 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
273 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
274 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
275
276 // PowerPC has no SREM/UREM instructions unless we are on P9
277 // On P9 we may use a hardware instruction to compute the remainder.
278 // When the result of both the remainder and the division is required it is
279 // more efficient to compute the remainder from the result of the division
280 // rather than use the remainder instruction. The instructions are legalized
281 // directly because the DivRemPairsPass performs the transformation at the IR
282 // level.
283 if (Subtarget.isISA3_0()) {
284 setOperationAction(ISD::SREM, MVT::i32, Legal);
285 setOperationAction(ISD::UREM, MVT::i32, Legal);
286 setOperationAction(ISD::SREM, MVT::i64, Legal);
287 setOperationAction(ISD::UREM, MVT::i64, Legal);
288 } else {
289 setOperationAction(ISD::SREM, MVT::i32, Expand);
290 setOperationAction(ISD::UREM, MVT::i32, Expand);
291 setOperationAction(ISD::SREM, MVT::i64, Expand);
292 setOperationAction(ISD::UREM, MVT::i64, Expand);
293 }
294
295 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
296 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
299 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
300 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
301 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
302 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
304
305 // Handle constrained floating-point operations of scalar.
306 // TODO: Handle SPE specific operation.
307 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
308 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
309 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
310 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
311 setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
312 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
313
314 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
315 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
316 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
317 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
318 setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
319 if (Subtarget.hasVSX()) {
320 setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
321 setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
322 }
323
324 if (Subtarget.hasFSQRT()) {
325 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
326 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
327 }
328
329 if (Subtarget.hasFPRND()) {
330 setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
331 setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
332 setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
333 setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
334
335 setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
336 setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
337 setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
338 setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
339 }
340
341 // We don't support sin/cos/sqrt/fmod/pow
342 setOperationAction(ISD::FSIN , MVT::f64, Expand);
343 setOperationAction(ISD::FCOS , MVT::f64, Expand);
344 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
345 setOperationAction(ISD::FREM , MVT::f64, Expand);
346 setOperationAction(ISD::FPOW , MVT::f64, Expand);
347 setOperationAction(ISD::FSIN , MVT::f32, Expand);
348 setOperationAction(ISD::FCOS , MVT::f32, Expand);
349 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
350 setOperationAction(ISD::FREM , MVT::f32, Expand);
351 setOperationAction(ISD::FPOW , MVT::f32, Expand);
352 if (Subtarget.hasSPE()) {
353 setOperationAction(ISD::FMA , MVT::f64, Expand);
354 setOperationAction(ISD::FMA , MVT::f32, Expand);
355 } else {
356 setOperationAction(ISD::FMA , MVT::f64, Legal);
357 setOperationAction(ISD::FMA , MVT::f32, Legal);
358 }
359
360 if (Subtarget.hasSPE())
361 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
362
363 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
364
365 // If we're enabling GP optimizations, use hardware square root
366 if (!Subtarget.hasFSQRT() &&
367 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
368 Subtarget.hasFRE()))
369 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
370
371 if (!Subtarget.hasFSQRT() &&
372 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
373 Subtarget.hasFRES()))
374 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
375
376 if (Subtarget.hasFCPSGN()) {
377 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
378 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
379 } else {
380 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
381 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
382 }
383
384 if (Subtarget.hasFPRND()) {
385 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
386 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
387 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
388 setOperationAction(ISD::FROUND, MVT::f64, Legal);
389
390 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
391 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
392 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
393 setOperationAction(ISD::FROUND, MVT::f32, Legal);
394 }
395
396 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
397 // to speed up scalar BSWAP64.
398 // CTPOP or CTTZ were introduced in P8/P9 respectively
399 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
400 if (Subtarget.hasP9Vector())
401 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
402 else
403 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
404 if (Subtarget.isISA3_0()) {
405 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
406 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
407 } else {
408 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
409 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
410 }
411
412 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
413 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
414 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
415 } else {
416 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
417 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
418 }
419
420 // PowerPC does not have ROTR
421 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
422 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
423
424 if (!Subtarget.useCRBits()) {
425 // PowerPC does not have Select
426 setOperationAction(ISD::SELECT, MVT::i32, Expand);
427 setOperationAction(ISD::SELECT, MVT::i64, Expand);
428 setOperationAction(ISD::SELECT, MVT::f32, Expand);
429 setOperationAction(ISD::SELECT, MVT::f64, Expand);
430 }
431
432 // PowerPC wants to turn select_cc of FP into fsel when possible.
433 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435
436 // PowerPC wants to optimize integer setcc a bit
437 if (!Subtarget.useCRBits())
438 setOperationAction(ISD::SETCC, MVT::i32, Custom);
439
440 if (Subtarget.hasFPU()) {
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
442 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
443 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
444
445 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
446 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
447 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
448 }
449
450 // PowerPC does not have BRCOND which requires SetCC
451 if (!Subtarget.useCRBits())
452 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
453
454 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
455
456 if (Subtarget.hasSPE()) {
457 // SPE has built-in conversions
458 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
459 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
460 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
461 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
463 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
464 } else {
465 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
466 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
467 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
468
469 // PowerPC does not have [U|S]INT_TO_FP
470 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
472 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
473 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
474 }
475
476 if (Subtarget.hasDirectMove() && isPPC64) {
477 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
478 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
479 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
480 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
481 if (TM.Options.UnsafeFPMath) {
482 setOperationAction(ISD::LRINT, MVT::f64, Legal);
483 setOperationAction(ISD::LRINT, MVT::f32, Legal);
484 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
485 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
486 setOperationAction(ISD::LROUND, MVT::f64, Legal);
487 setOperationAction(ISD::LROUND, MVT::f32, Legal);
488 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
489 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
490 }
491 } else {
492 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
493 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
494 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
495 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
496 }
497
498 // We cannot sextinreg(i1). Expand to shifts.
499 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
500
501 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
502 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
503 // support continuation, user-level threading, and etc.. As a result, no
504 // other SjLj exception interfaces are implemented and please don't build
505 // your own exception handling based on them.
506 // LLVM/Clang supports zero-cost DWARF exception handling.
507 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
508 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
509
510 // We want to legalize GlobalAddress and ConstantPool nodes into the
511 // appropriate instructions to materialize the address.
512 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
513 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
514 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
515 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
516 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
517 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
518 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
519 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
520 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
521 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
522
523 // TRAP is legal.
524 setOperationAction(ISD::TRAP, MVT::Other, Legal);
525
526 // TRAMPOLINE is custom lowered.
527 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
528 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
529
530 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
531 setOperationAction(ISD::VASTART , MVT::Other, Custom);
532
533 if (Subtarget.is64BitELFABI()) {
534 // VAARG always uses double-word chunks, so promote anything smaller.
535 setOperationAction(ISD::VAARG, MVT::i1, Promote);
536 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
537 setOperationAction(ISD::VAARG, MVT::i8, Promote);
538 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
539 setOperationAction(ISD::VAARG, MVT::i16, Promote);
540 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
541 setOperationAction(ISD::VAARG, MVT::i32, Promote);
542 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
543 setOperationAction(ISD::VAARG, MVT::Other, Expand);
544 } else if (Subtarget.is32BitELFABI()) {
545 // VAARG is custom lowered with the 32-bit SVR4 ABI.
546 setOperationAction(ISD::VAARG, MVT::Other, Custom);
547 setOperationAction(ISD::VAARG, MVT::i64, Custom);
548 } else
549 setOperationAction(ISD::VAARG, MVT::Other, Expand);
550
551 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
552 if (Subtarget.is32BitELFABI())
553 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
554 else
555 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
556
557 // Use the default implementation.
558 setOperationAction(ISD::VAEND , MVT::Other, Expand);
559 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
560 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
561 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
562 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
563 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
564 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
565 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
566 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
567
568 // We want to custom lower some of our intrinsics.
569 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
570
571 // To handle counter-based loop conditions.
572 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
573
574 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
575 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
576 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
577 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
578
579 // Comparisons that require checking two conditions.
580 if (Subtarget.hasSPE()) {
581 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
582 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
583 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
584 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
585 }
586 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
587 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
588 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
589 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
590 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
591 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
592 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
593 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
594 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
595 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
596 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
597 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
598
599 if (Subtarget.has64BitSupport()) {
600 // They also have instructions for converting between i64 and fp.
601 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
602 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
603 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
604 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
605 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
606 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
607 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
608 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
609 // This is just the low 32 bits of a (signed) fp->i64 conversion.
610 // We cannot do this with Promote because i64 is not a legal type.
611 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
612 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
613
614 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
615 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
616 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
617 }
618 } else {
619 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
620 if (Subtarget.hasSPE()) {
621 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
622 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
623 } else {
624 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
625 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
626 }
627 }
628
629 // With the instructions enabled under FPCVT, we can do everything.
630 if (Subtarget.hasFPCVT()) {
631 if (Subtarget.has64BitSupport()) {
632 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
633 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
634 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
635 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
636 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
637 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
638 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
639 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
640 }
641
642 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
643 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
644 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
645 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
646 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
647 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
648 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
649 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
650 }
651
652 if (Subtarget.use64BitRegs()) {
653 // 64-bit PowerPC implementations can support i64 types directly
654 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
655 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
656 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
657 // 64-bit PowerPC wants to expand i128 shifts itself.
658 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
659 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
660 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
661 } else {
662 // 32-bit PowerPC wants to expand i64 shifts itself.
663 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
664 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
665 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
666 }
667
668 // PowerPC has better expansions for funnel shifts than the generic
669 // TargetLowering::expandFunnelShift.
670 if (Subtarget.has64BitSupport()) {
671 setOperationAction(ISD::FSHL, MVT::i64, Custom);
672 setOperationAction(ISD::FSHR, MVT::i64, Custom);
673 }
674 setOperationAction(ISD::FSHL, MVT::i32, Custom);
675 setOperationAction(ISD::FSHR, MVT::i32, Custom);
676
677 if (Subtarget.hasVSX()) {
678 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
679 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
680 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
681 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
682 }
683
684 if (Subtarget.hasAltivec()) {
685 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
686 setOperationAction(ISD::SADDSAT, VT, Legal);
687 setOperationAction(ISD::SSUBSAT, VT, Legal);
688 setOperationAction(ISD::UADDSAT, VT, Legal);
689 setOperationAction(ISD::USUBSAT, VT, Legal);
690 }
691 // First set operation action for all vector types to expand. Then we
692 // will selectively turn on ones that can be effectively codegen'd.
693 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
694 // add/sub are legal for all supported vector VT's.
695 setOperationAction(ISD::ADD, VT, Legal);
696 setOperationAction(ISD::SUB, VT, Legal);
697
698 // For v2i64, these are only valid with P8Vector. This is corrected after
699 // the loop.
700 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
701 setOperationAction(ISD::SMAX, VT, Legal);
702 setOperationAction(ISD::SMIN, VT, Legal);
703 setOperationAction(ISD::UMAX, VT, Legal);
704 setOperationAction(ISD::UMIN, VT, Legal);
705 }
706 else {
707 setOperationAction(ISD::SMAX, VT, Expand);
708 setOperationAction(ISD::SMIN, VT, Expand);
709 setOperationAction(ISD::UMAX, VT, Expand);
710 setOperationAction(ISD::UMIN, VT, Expand);
711 }
712
713 if (Subtarget.hasVSX()) {
714 setOperationAction(ISD::FMAXNUM, VT, Legal);
715 setOperationAction(ISD::FMINNUM, VT, Legal);
716 }
717
718 // Vector instructions introduced in P8
719 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
720 setOperationAction(ISD::CTPOP, VT, Legal);
721 setOperationAction(ISD::CTLZ, VT, Legal);
722 }
723 else {
724 setOperationAction(ISD::CTPOP, VT, Expand);
725 setOperationAction(ISD::CTLZ, VT, Expand);
726 }
727
728 // Vector instructions introduced in P9
729 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
730 setOperationAction(ISD::CTTZ, VT, Legal);
731 else
732 setOperationAction(ISD::CTTZ, VT, Expand);
733
734 // We promote all shuffles to v16i8.
735 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
736 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
737
738 // We promote all non-typed operations to v4i32.
739 setOperationAction(ISD::AND , VT, Promote);
740 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
741 setOperationAction(ISD::OR , VT, Promote);
742 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
743 setOperationAction(ISD::XOR , VT, Promote);
744 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
745 setOperationAction(ISD::LOAD , VT, Promote);
746 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
747 setOperationAction(ISD::SELECT, VT, Promote);
748 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
749 setOperationAction(ISD::VSELECT, VT, Legal);
750 setOperationAction(ISD::SELECT_CC, VT, Promote);
751 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
752 setOperationAction(ISD::STORE, VT, Promote);
753 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
754
755 // No other operations are legal.
756 setOperationAction(ISD::MUL , VT, Expand);
757 setOperationAction(ISD::SDIV, VT, Expand);
758 setOperationAction(ISD::SREM, VT, Expand);
759 setOperationAction(ISD::UDIV, VT, Expand);
760 setOperationAction(ISD::UREM, VT, Expand);
761 setOperationAction(ISD::FDIV, VT, Expand);
762 setOperationAction(ISD::FREM, VT, Expand);
763 setOperationAction(ISD::FNEG, VT, Expand);
764 setOperationAction(ISD::FSQRT, VT, Expand);
765 setOperationAction(ISD::FLOG, VT, Expand);
766 setOperationAction(ISD::FLOG10, VT, Expand);
767 setOperationAction(ISD::FLOG2, VT, Expand);
768 setOperationAction(ISD::FEXP, VT, Expand);
769 setOperationAction(ISD::FEXP2, VT, Expand);
770 setOperationAction(ISD::FSIN, VT, Expand);
771 setOperationAction(ISD::FCOS, VT, Expand);
772 setOperationAction(ISD::FABS, VT, Expand);
773 setOperationAction(ISD::FFLOOR, VT, Expand);
774 setOperationAction(ISD::FCEIL, VT, Expand);
775 setOperationAction(ISD::FTRUNC, VT, Expand);
776 setOperationAction(ISD::FRINT, VT, Expand);
777 setOperationAction(ISD::FNEARBYINT, VT, Expand);
778 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
779 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
781 setOperationAction(ISD::MULHU, VT, Expand);
782 setOperationAction(ISD::MULHS, VT, Expand);
783 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
784 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785 setOperationAction(ISD::UDIVREM, VT, Expand);
786 setOperationAction(ISD::SDIVREM, VT, Expand);
787 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
788 setOperationAction(ISD::FPOW, VT, Expand);
789 setOperationAction(ISD::BSWAP, VT, Expand);
790 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
791 setOperationAction(ISD::ROTL, VT, Expand);
792 setOperationAction(ISD::ROTR, VT, Expand);
793
794 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
795 setTruncStoreAction(VT, InnerVT, Expand);
796 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
797 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
798 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
799 }
800 }
801 setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
802 if (!Subtarget.hasP8Vector()) {
803 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
804 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
805 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
806 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
807 }
808
809 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
810 setOperationAction(ISD::ABS, VT, Custom);
811
812 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
813 // with merges, splats, etc.
814 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
815
816 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
817 // are cheap, so handle them before they get expanded to scalar.
818 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
819 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
820 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
821 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
822 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
823
824 setOperationAction(ISD::AND , MVT::v4i32, Legal);
825 setOperationAction(ISD::OR , MVT::v4i32, Legal);
826 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
827 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
828 setOperationAction(ISD::SELECT, MVT::v4i32,
829 Subtarget.useCRBits() ? Legal : Expand);
830 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
831 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
832 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
833 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
834 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
835 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
836 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
837 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
838 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
839 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
840 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
841 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
842 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
843
844 // Without hasP8Altivec set, v2i64 SMAX isn't available.
845 // But ABS custom lowering requires SMAX support.
846 if (!Subtarget.hasP8Altivec())
847 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
848
849 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
850 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
851 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
852 if (Subtarget.hasAltivec())
853 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
854 setOperationAction(ISD::ROTL, VT, Legal);
855 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
856 if (Subtarget.hasP8Altivec())
857 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
858
859 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
860 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
861 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
862 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
863
864 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
865 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
866
867 if (Subtarget.hasVSX()) {
868 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
869 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
870 }
871
872 if (Subtarget.hasP8Altivec())
873 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
874 else
875 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
876
877 if (Subtarget.isISA3_1()) {
878 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
879 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
880 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
881 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
882 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
883 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
884 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
885 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
886 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
887 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
888 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
889 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
890 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
891 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
892 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
893 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
894 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
895 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
896 }
897
898 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
899 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
900
901 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
902 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
903
904 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
905 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
906 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
907 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
908
909 // Altivec does not contain unordered floating-point compare instructions
910 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
911 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
912 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
913 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
914
915 if (Subtarget.hasVSX()) {
916 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
917 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
918 if (Subtarget.hasP8Vector()) {
919 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
920 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
921 }
922 if (Subtarget.hasDirectMove() && isPPC64) {
923 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
924 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
925 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
926 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
927 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
928 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
929 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
930 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
931 }
932 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
933
934 // The nearbyint variants are not allowed to raise the inexact exception
935 // so we can only code-gen them with unsafe math.
936 if (TM.Options.UnsafeFPMath) {
937 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
938 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
939 }
940
941 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
942 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
943 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
944 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
945 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
946 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
947 setOperationAction(ISD::FROUND, MVT::f64, Legal);
948 setOperationAction(ISD::FRINT, MVT::f64, Legal);
949
950 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
951 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
952 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
953 setOperationAction(ISD::FROUND, MVT::f32, Legal);
954 setOperationAction(ISD::FRINT, MVT::f32, Legal);
955
956 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
957 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
958
959 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
960 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
961
962 // Share the Altivec comparison restrictions.
963 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
964 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
965 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
966 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
967
968 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
969 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
970
971 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
972
973 if (Subtarget.hasP8Vector())
974 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
975
976 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
977
978 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
979 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
980 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
981
982 if (Subtarget.hasP8Altivec()) {
983 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
984 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
985 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
986
987 // 128 bit shifts can be accomplished via 3 instructions for SHL and
988 // SRL, but not for SRA because of the instructions available:
989 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
990 // doing
991 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
992 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
993 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
994
995 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
996 }
997 else {
998 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
999 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1000 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1001
1002 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1003
1004 // VSX v2i64 only supports non-arithmetic operations.
1005 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1006 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1007 }
1008
1009 if (Subtarget.isISA3_1())
1010 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1011 else
1012 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1013
1014 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1015 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1016 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1017 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1018
1019 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1020
1021 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1022 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1023 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1024 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1025 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1026 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1027 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1028 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1029
1030 // Custom handling for partial vectors of integers converted to
1031 // floating point. We already have optimal handling for v2i32 through
1032 // the DAG combine, so those aren't necessary.
1033 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1034 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1035 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1036 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1037 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1038 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1039 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1040 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1041 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1042 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1043 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1044 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1045 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1046 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1047 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1048 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1049
1050 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1051 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1052 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1053 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1054 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1055 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1056
1057 if (Subtarget.hasDirectMove())
1058 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1059 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1060
1061 // Handle constrained floating-point operations of vector.
1062 // The predictor is `hasVSX` because altivec instruction has
1063 // no exception but VSX vector instruction has.
1064 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1065 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1066 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1067 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1068 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1069 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1070 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1071 setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1072 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1073 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1074 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1075 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1076 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1077
1078 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1079 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1080 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1081 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1082 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1083 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1084 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1086 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1087 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1088 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1089 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1090 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1091
1092 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1093 }
1094
1095 if (Subtarget.hasP8Altivec()) {
1096 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1097 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1098 }
1099
1100 if (Subtarget.hasP9Vector()) {
1101 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1102 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1103
1104 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1105 // SRL, but not for SRA because of the instructions available:
1106 // VS{RL} and VS{RL}O.
1107 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1108 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1109 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1110
1111 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1112 setOperationAction(ISD::FADD, MVT::f128, Legal);
1113 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1114 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1115 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1116 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1117 // No extending loads to f128 on PPC.
1118 for (MVT FPT : MVT::fp_valuetypes())
1119 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1120 setOperationAction(ISD::FMA, MVT::f128, Legal);
1121 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1122 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1123 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1124 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1125 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1126 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1127
1128 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1129 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1130 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1131 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1132 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1133 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1134
1135 setOperationAction(ISD::SELECT, MVT::f128, Expand);
1136 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1137 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1138 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1139 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1140 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1141 // No implementation for these ops for PowerPC.
1142 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1143 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1144 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1145 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1146 setOperationAction(ISD::FREM, MVT::f128, Expand);
1147
1148 // Handle constrained floating-point operations of fp128
1149 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1150 setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1151 setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1152 setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1153 setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1154 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1155 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1156 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1157 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1158 setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1159 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1160 setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1161 setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1162 setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1163 setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1164 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1165 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1166 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1167 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1168 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1169 }
1170
1171 if (Subtarget.hasP9Altivec()) {
1172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1174
1175 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1177 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1178 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1179 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1180 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1181 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1182 }
1183 }
1184
1185 if (Subtarget.pairedVectorMemops()) {
1186 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1187 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1188 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1189 }
1190 if (Subtarget.hasMMA()) {
1191 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1192 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1193 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1194 setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1195 }
1196
1197 if (Subtarget.has64BitSupport())
1198 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1199
1200 if (Subtarget.isISA3_1())
1201 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1202
1203 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1204
1205 if (!isPPC64) {
1206 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1207 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1208 }
1209
1210 setBooleanContents(ZeroOrOneBooleanContent);
1211
1212 if (Subtarget.hasAltivec()) {
1213 // Altivec instructions set fields to all zeros or all ones.
1214 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1215 }
1216
1217 if (!isPPC64) {
1218 // These libcalls are not available in 32-bit.
1219 setLibcallName(RTLIB::SHL_I128, nullptr);
1220 setLibcallName(RTLIB::SRL_I128, nullptr);
1221 setLibcallName(RTLIB::SRA_I128, nullptr);
1222 }
1223
1224 if (!isPPC64)
1225 setMaxAtomicSizeInBitsSupported(32);
1226
1227 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1228
1229 // We have target-specific dag combine patterns for the following nodes:
1230 setTargetDAGCombine(ISD::ADD);
1231 setTargetDAGCombine(ISD::SHL);
1232 setTargetDAGCombine(ISD::SRA);
1233 setTargetDAGCombine(ISD::SRL);
1234 setTargetDAGCombine(ISD::MUL);
1235 setTargetDAGCombine(ISD::FMA);
1236 setTargetDAGCombine(ISD::SINT_TO_FP);
1237 setTargetDAGCombine(ISD::BUILD_VECTOR);
1238 if (Subtarget.hasFPCVT())
1239 setTargetDAGCombine(ISD::UINT_TO_FP);
1240 setTargetDAGCombine(ISD::LOAD);
1241 setTargetDAGCombine(ISD::STORE);
1242 setTargetDAGCombine(ISD::BR_CC);
1243 if (Subtarget.useCRBits())
1244 setTargetDAGCombine(ISD::BRCOND);
1245 setTargetDAGCombine(ISD::BSWAP);
1246 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1247 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1248 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1249
1250 setTargetDAGCombine(ISD::SIGN_EXTEND);
1251 setTargetDAGCombine(ISD::ZERO_EXTEND);
1252 setTargetDAGCombine(ISD::ANY_EXTEND);
1253
1254 setTargetDAGCombine(ISD::TRUNCATE);
1255 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1256
1257
1258 if (Subtarget.useCRBits()) {
1259 setTargetDAGCombine(ISD::TRUNCATE);
1260 setTargetDAGCombine(ISD::SETCC);
1261 setTargetDAGCombine(ISD::SELECT_CC);
1262 }
1263
1264 if (Subtarget.hasP9Altivec()) {
1265 setTargetDAGCombine(ISD::ABS);
1266 setTargetDAGCombine(ISD::VSELECT);
1267 }
1268
1269 setLibcallName(RTLIB::LOG_F128, "logf128");
1270 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1271 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1272 setLibcallName(RTLIB::EXP_F128, "expf128");
1273 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1274 setLibcallName(RTLIB::SIN_F128, "sinf128");
1275 setLibcallName(RTLIB::COS_F128, "cosf128");
1276 setLibcallName(RTLIB::POW_F128, "powf128");
1277 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1278 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1279 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1280 setLibcallName(RTLIB::REM_F128, "fmodf128");
1281
1282 // With 32 condition bits, we don't need to sink (and duplicate) compares
1283 // aggressively in CodeGenPrep.
1284 if (Subtarget.useCRBits()) {
1285 setHasMultipleConditionRegisters();
1286 setJumpIsExpensive();
1287 }
1288
1289 setMinFunctionAlignment(Align(4));
1290
1291 switch (Subtarget.getCPUDirective()) {
1292 default: break;
1293 case PPC::DIR_970:
1294 case PPC::DIR_A2:
1295 case PPC::DIR_E500:
1296 case PPC::DIR_E500mc:
1297 case PPC::DIR_E5500:
1298 case PPC::DIR_PWR4:
1299 case PPC::DIR_PWR5:
1300 case PPC::DIR_PWR5X:
1301 case PPC::DIR_PWR6:
1302 case PPC::DIR_PWR6X:
1303 case PPC::DIR_PWR7:
1304 case PPC::DIR_PWR8:
1305 case PPC::DIR_PWR9:
1306 case PPC::DIR_PWR10:
1307 case PPC::DIR_PWR_FUTURE:
1308 setPrefLoopAlignment(Align(16));
1309 setPrefFunctionAlignment(Align(16));
1310 break;
1311 }
1312
1313 if (Subtarget.enableMachineScheduler())
1314 setSchedulingPreference(Sched::Source);
1315 else
1316 setSchedulingPreference(Sched::Hybrid);
1317
1318 computeRegisterProperties(STI.getRegisterInfo());
1319
1320 // The Freescale cores do better with aggressive inlining of memcpy and
1321 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1322 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1323 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1324 MaxStoresPerMemset = 32;
1325 MaxStoresPerMemsetOptSize = 16;
1326 MaxStoresPerMemcpy = 32;
1327 MaxStoresPerMemcpyOptSize = 8;
1328 MaxStoresPerMemmove = 32;
1329 MaxStoresPerMemmoveOptSize = 8;
1330 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1331 // The A2 also benefits from (very) aggressive inlining of memcpy and
1332 // friends. The overhead of a the function call, even when warm, can be
1333 // over one hundred cycles.
1334 MaxStoresPerMemset = 128;
1335 MaxStoresPerMemcpy = 128;
1336 MaxStoresPerMemmove = 128;
1337 MaxLoadsPerMemcmp = 128;
1338 } else {
1339 MaxLoadsPerMemcmp = 8;
1340 MaxLoadsPerMemcmpOptSize = 4;
1341 }
1342
1343 IsStrictFPEnabled = true;
1344
1345 // Let the subtarget (CPU) decide if a predictable select is more expensive
1346 // than the corresponding branch. This information is used in CGP to decide
1347 // when to convert selects into branches.
1348 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1349}
1350
1351/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1352/// the desired ByVal argument alignment.
1353static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1354 if (MaxAlign == MaxMaxAlign)
1355 return;
1356 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1357 if (MaxMaxAlign >= 32 &&
1358 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1359 MaxAlign = Align(32);
1360 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1361 MaxAlign < 16)
1362 MaxAlign = Align(16);
1363 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1364 Align EltAlign;
1365 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1366 if (EltAlign > MaxAlign)
1367 MaxAlign = EltAlign;
1368 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1369 for (auto *EltTy : STy->elements()) {
1370 Align EltAlign;
1371 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1372 if (EltAlign > MaxAlign)
1373 MaxAlign = EltAlign;
1374 if (MaxAlign == MaxMaxAlign)
1375 break;
1376 }
1377 }
1378}
1379
1380/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1381/// function arguments in the caller parameter area.
1382unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1383 const DataLayout &DL) const {
1384 // 16byte and wider vectors are passed on 16byte boundary.
1385 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1386 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1387 if (Subtarget.hasAltivec())
1388 getMaxByValAlign(Ty, Alignment, Align(16));
1389 return Alignment.value();
1390}
1391
1392bool PPCTargetLowering::useSoftFloat() const {
1393 return Subtarget.useSoftFloat();
1394}
1395
1396bool PPCTargetLowering::hasSPE() const {
1397 return Subtarget.hasSPE();
1398}
1399
1400bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1401 return VT.isScalarInteger();
1402}
1403
1404const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1405 switch ((PPCISD::NodeType)Opcode) {
1406 case PPCISD::FIRST_NUMBER: break;
1407 case PPCISD::FSEL: return "PPCISD::FSEL";
1408 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1409 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1410 case PPCISD::FCFID: return "PPCISD::FCFID";
1411 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1412 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1413 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1414 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1415 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1416 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1417 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1418 case PPCISD::FP_TO_UINT_IN_VSR:
1419 return "PPCISD::FP_TO_UINT_IN_VSR,";
1420 case PPCISD::FP_TO_SINT_IN_VSR:
1421 return "PPCISD::FP_TO_SINT_IN_VSR";
1422 case PPCISD::FRE: return "PPCISD::FRE";
1423 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1424 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1425 case PPCISD::VPERM: return "PPCISD::VPERM";
1426 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1427 case PPCISD::XXSPLTI_SP_TO_DP:
1428 return "PPCISD::XXSPLTI_SP_TO_DP";
1429 case PPCISD::XXSPLTI32DX:
1430 return "PPCISD::XXSPLTI32DX";
1431 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1432 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1433 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1434 case PPCISD::CMPB: return "PPCISD::CMPB";
1435 case PPCISD::Hi: return "PPCISD::Hi";
1436 case PPCISD::Lo: return "PPCISD::Lo";
1437 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1438 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1439 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1440 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1441 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1442 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1443 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1444 case PPCISD::SRL: return "PPCISD::SRL";
1445 case PPCISD::SRA: return "PPCISD::SRA";
1446 case PPCISD::SHL: return "PPCISD::SHL";
1447 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1448 case PPCISD::CALL: return "PPCISD::CALL";
1449 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1450 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1451 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1452 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1453 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1454 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1455 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1456 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1457 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1458 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1459 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1460 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1461 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1462 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1463 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1464 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1465 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1466 case PPCISD::ANDI_rec_1_EQ_BIT:
1467 return "PPCISD::ANDI_rec_1_EQ_BIT";
1468 case PPCISD::ANDI_rec_1_GT_BIT:
1469 return "PPCISD::ANDI_rec_1_GT_BIT";
1470 case PPCISD::VCMP: return "PPCISD::VCMP";
1471 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1472 case PPCISD::LBRX: return "PPCISD::LBRX";
1473 case PPCISD::STBRX: return "PPCISD::STBRX";
1474 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1475 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1476 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1477 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1478 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1479 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1480 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1481 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1482 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1483 case PPCISD::ST_VSR_SCAL_INT:
1484 return "PPCISD::ST_VSR_SCAL_INT";
1485 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1486 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1487 case PPCISD::BDZ: return "PPCISD::BDZ";
1488 case PPCISD::MFFS: return "PPCISD::MFFS";
1489 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1490 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1491 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1492 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1493 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1494 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1495 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1496 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1497 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1498 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1499 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1500 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1501 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1502 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1503 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1504 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1505 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1506 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1507 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1508 case PPCISD::PADDI_DTPREL:
1509 return "PPCISD::PADDI_DTPREL";
1510 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1511 case PPCISD::SC: return "PPCISD::SC";
1512 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1513 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1514 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1515 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1516 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1517 case PPCISD::VABSD: return "PPCISD::VABSD";
1518 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1519 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1520 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1521 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1522 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1523 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1524 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1525 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1526 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1527 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1528 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1529 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1530 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1531 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1532 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1533 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1534 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1535 case PPCISD::STRICT_FADDRTZ:
1536 return "PPCISD::STRICT_FADDRTZ";
1537 case PPCISD::STRICT_FCTIDZ:
1538 return "PPCISD::STRICT_FCTIDZ";
1539 case PPCISD::STRICT_FCTIWZ:
1540 return "PPCISD::STRICT_FCTIWZ";
1541 case PPCISD::STRICT_FCTIDUZ:
1542 return "PPCISD::STRICT_FCTIDUZ";
1543 case PPCISD::STRICT_FCTIWUZ:
1544 return "PPCISD::STRICT_FCTIWUZ";
1545 case PPCISD::STRICT_FCFID:
1546 return "PPCISD::STRICT_FCFID";
1547 case PPCISD::STRICT_FCFIDU:
1548 return "PPCISD::STRICT_FCFIDU";
1549 case PPCISD::STRICT_FCFIDS:
1550 return "PPCISD::STRICT_FCFIDS";
1551 case PPCISD::STRICT_FCFIDUS:
1552 return "PPCISD::STRICT_FCFIDUS";
1553 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1554 }
1555 return nullptr;
1556}
1557
1558EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1559 EVT VT) const {
1560 if (!VT.isVector())
1561 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1562
1563 return VT.changeVectorElementTypeToInteger();
1564}
1565
1566bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1567 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1567, __PRETTY_FUNCTION__))
;
1568 return true;
1569}
1570
1571//===----------------------------------------------------------------------===//
1572// Node matching predicates, for use by the tblgen matching code.
1573//===----------------------------------------------------------------------===//
1574
1575/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1576static bool isFloatingPointZero(SDValue Op) {
1577 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1578 return CFP->getValueAPF().isZero();
1579 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1580 // Maybe this has already been legalized into the constant pool?
1581 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1582 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1583 return CFP->getValueAPF().isZero();
1584 }
1585 return false;
1586}
1587
1588/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1589/// true if Op is undef or if it matches the specified value.
1590static bool isConstantOrUndef(int Op, int Val) {
1591 return Op < 0 || Op == Val;
1592}
1593
1594/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1595/// VPKUHUM instruction.
1596/// The ShuffleKind distinguishes between big-endian operations with
1597/// two different inputs (0), either-endian operations with two identical
1598/// inputs (1), and little-endian operations with two different inputs (2).
1599/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1600bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1601 SelectionDAG &DAG) {
1602 bool IsLE = DAG.getDataLayout().isLittleEndian();
1603 if (ShuffleKind == 0) {
1604 if (IsLE)
1605 return false;
1606 for (unsigned i = 0; i != 16; ++i)
1607 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1608 return false;
1609 } else if (ShuffleKind == 2) {
1610 if (!IsLE)
1611 return false;
1612 for (unsigned i = 0; i != 16; ++i)
1613 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1614 return false;
1615 } else if (ShuffleKind == 1) {
1616 unsigned j = IsLE ? 0 : 1;
1617 for (unsigned i = 0; i != 8; ++i)
1618 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1619 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1620 return false;
1621 }
1622 return true;
1623}
1624
1625/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1626/// VPKUWUM instruction.
1627/// The ShuffleKind distinguishes between big-endian operations with
1628/// two different inputs (0), either-endian operations with two identical
1629/// inputs (1), and little-endian operations with two different inputs (2).
1630/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1631bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1632 SelectionDAG &DAG) {
1633 bool IsLE = DAG.getDataLayout().isLittleEndian();
1634 if (ShuffleKind == 0) {
1635 if (IsLE)
1636 return false;
1637 for (unsigned i = 0; i != 16; i += 2)
1638 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1639 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1640 return false;
1641 } else if (ShuffleKind == 2) {
1642 if (!IsLE)
1643 return false;
1644 for (unsigned i = 0; i != 16; i += 2)
1645 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1646 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1647 return false;
1648 } else if (ShuffleKind == 1) {
1649 unsigned j = IsLE ? 0 : 2;
1650 for (unsigned i = 0; i != 8; i += 2)
1651 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1652 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1653 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1654 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1655 return false;
1656 }
1657 return true;
1658}
1659
1660/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1661/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1662/// current subtarget.
1663///
1664/// The ShuffleKind distinguishes between big-endian operations with
1665/// two different inputs (0), either-endian operations with two identical
1666/// inputs (1), and little-endian operations with two different inputs (2).
1667/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1668bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1669 SelectionDAG &DAG) {
1670 const PPCSubtarget& Subtarget =
1671 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1672 if (!Subtarget.hasP8Vector())
1673 return false;
1674
1675 bool IsLE = DAG.getDataLayout().isLittleEndian();
1676 if (ShuffleKind == 0) {
1677 if (IsLE)
1678 return false;
1679 for (unsigned i = 0; i != 16; i += 4)
1680 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1681 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1682 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1683 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1684 return false;
1685 } else if (ShuffleKind == 2) {
1686 if (!IsLE)
1687 return false;
1688 for (unsigned i = 0; i != 16; i += 4)
1689 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1690 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1691 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1692 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1693 return false;
1694 } else if (ShuffleKind == 1) {
1695 unsigned j = IsLE ? 0 : 4;
1696 for (unsigned i = 0; i != 8; i += 4)
1697 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1698 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1699 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1700 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1701 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1702 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1703 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1704 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1705 return false;
1706 }
1707 return true;
1708}
1709
1710/// isVMerge - Common function, used to match vmrg* shuffles.
1711///
1712static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1713 unsigned LHSStart, unsigned RHSStart) {
1714 if (N->getValueType(0) != MVT::v16i8)
1715 return false;
1716 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1717, __PRETTY_FUNCTION__))
1717 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1717, __PRETTY_FUNCTION__))
;
1718
1719 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1720 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1721 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1722 LHSStart+j+i*UnitSize) ||
1723 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1724 RHSStart+j+i*UnitSize))
1725 return false;
1726 }
1727 return true;
1728}
1729
1730/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1731/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1732/// The ShuffleKind distinguishes between big-endian merges with two
1733/// different inputs (0), either-endian merges with two identical inputs (1),
1734/// and little-endian merges with two different inputs (2). For the latter,
1735/// the input operands are swapped (see PPCInstrAltivec.td).
1736bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1737 unsigned ShuffleKind, SelectionDAG &DAG) {
1738 if (DAG.getDataLayout().isLittleEndian()) {
1739 if (ShuffleKind == 1) // unary
1740 return isVMerge(N, UnitSize, 0, 0);
1741 else if (ShuffleKind == 2) // swapped
1742 return isVMerge(N, UnitSize, 0, 16);
1743 else
1744 return false;
1745 } else {
1746 if (ShuffleKind == 1) // unary
1747 return isVMerge(N, UnitSize, 8, 8);
1748 else if (ShuffleKind == 0) // normal
1749 return isVMerge(N, UnitSize, 8, 24);
1750 else
1751 return false;
1752 }
1753}
1754
1755/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1756/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1757/// The ShuffleKind distinguishes between big-endian merges with two
1758/// different inputs (0), either-endian merges with two identical inputs (1),
1759/// and little-endian merges with two different inputs (2). For the latter,
1760/// the input operands are swapped (see PPCInstrAltivec.td).
1761bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1762 unsigned ShuffleKind, SelectionDAG &DAG) {
1763 if (DAG.getDataLayout().isLittleEndian()) {
1764 if (ShuffleKind == 1) // unary
1765 return isVMerge(N, UnitSize, 8, 8);
1766 else if (ShuffleKind == 2) // swapped
1767 return isVMerge(N, UnitSize, 8, 24);
1768 else
1769 return false;
1770 } else {
1771 if (ShuffleKind == 1) // unary
1772 return isVMerge(N, UnitSize, 0, 0);
1773 else if (ShuffleKind == 0) // normal
1774 return isVMerge(N, UnitSize, 0, 16);
1775 else
1776 return false;
1777 }
1778}
1779
1780/**
1781 * Common function used to match vmrgew and vmrgow shuffles
1782 *
1783 * The indexOffset determines whether to look for even or odd words in
1784 * the shuffle mask. This is based on the of the endianness of the target
1785 * machine.
1786 * - Little Endian:
1787 * - Use offset of 0 to check for odd elements
1788 * - Use offset of 4 to check for even elements
1789 * - Big Endian:
1790 * - Use offset of 0 to check for even elements
1791 * - Use offset of 4 to check for odd elements
1792 * A detailed description of the vector element ordering for little endian and
1793 * big endian can be found at
1794 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1795 * Targeting your applications - what little endian and big endian IBM XL C/C++
1796 * compiler differences mean to you
1797 *
1798 * The mask to the shuffle vector instruction specifies the indices of the
1799 * elements from the two input vectors to place in the result. The elements are
1800 * numbered in array-access order, starting with the first vector. These vectors
1801 * are always of type v16i8, thus each vector will contain 16 elements of size
1802 * 8. More info on the shuffle vector can be found in the
1803 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1804 * Language Reference.
1805 *
1806 * The RHSStartValue indicates whether the same input vectors are used (unary)
1807 * or two different input vectors are used, based on the following:
1808 * - If the instruction uses the same vector for both inputs, the range of the
1809 * indices will be 0 to 15. In this case, the RHSStart value passed should
1810 * be 0.
1811 * - If the instruction has two different vectors then the range of the
1812 * indices will be 0 to 31. In this case, the RHSStart value passed should
1813 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1814 * to 31 specify elements in the second vector).
1815 *
1816 * \param[in] N The shuffle vector SD Node to analyze
1817 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1818 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1819 * vector to the shuffle_vector instruction
1820 * \return true iff this shuffle vector represents an even or odd word merge
1821 */
1822static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1823 unsigned RHSStartValue) {
1824 if (N->getValueType(0) != MVT::v16i8)
1825 return false;
1826
1827 for (unsigned i = 0; i < 2; ++i)
1828 for (unsigned j = 0; j < 4; ++j)
1829 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1830 i*RHSStartValue+j+IndexOffset) ||
1831 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1832 i*RHSStartValue+j+IndexOffset+8))
1833 return false;
1834 return true;
1835}
1836
1837/**
1838 * Determine if the specified shuffle mask is suitable for the vmrgew or
1839 * vmrgow instructions.
1840 *
1841 * \param[in] N The shuffle vector SD Node to analyze
1842 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1843 * \param[in] ShuffleKind Identify the type of merge:
1844 * - 0 = big-endian merge with two different inputs;
1845 * - 1 = either-endian merge with two identical inputs;
1846 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1847 * little-endian merges).
1848 * \param[in] DAG The current SelectionDAG
1849 * \return true iff this shuffle mask
1850 */
1851bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1852 unsigned ShuffleKind, SelectionDAG &DAG) {
1853 if (DAG.getDataLayout().isLittleEndian()) {
1854 unsigned indexOffset = CheckEven ? 4 : 0;
1855 if (ShuffleKind == 1) // Unary
1856 return isVMerge(N, indexOffset, 0);
1857 else if (ShuffleKind == 2) // swapped
1858 return isVMerge(N, indexOffset, 16);
1859 else
1860 return false;
1861 }
1862 else {
1863 unsigned indexOffset = CheckEven ? 0 : 4;
1864 if (ShuffleKind == 1) // Unary
1865 return isVMerge(N, indexOffset, 0);
1866 else if (ShuffleKind == 0) // Normal
1867 return isVMerge(N, indexOffset, 16);
1868 else
1869 return false;
1870 }
1871 return false;
1872}
1873
1874/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1875/// amount, otherwise return -1.
1876/// The ShuffleKind distinguishes between big-endian operations with two
1877/// different inputs (0), either-endian operations with two identical inputs
1878/// (1), and little-endian operations with two different inputs (2). For the
1879/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1880int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1881 SelectionDAG &DAG) {
1882 if (N->getValueType(0) != MVT::v16i8)
1883 return -1;
1884
1885 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1886
1887 // Find the first non-undef value in the shuffle mask.
1888 unsigned i;
1889 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1890 /*search*/;
1891
1892 if (i == 16) return -1; // all undef.
1893
1894 // Otherwise, check to see if the rest of the elements are consecutively
1895 // numbered from this value.
1896 unsigned ShiftAmt = SVOp->getMaskElt(i);
1897 if (ShiftAmt < i) return -1;
1898
1899 ShiftAmt -= i;
1900 bool isLE = DAG.getDataLayout().isLittleEndian();
1901
1902 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1903 // Check the rest of the elements to see if they are consecutive.
1904 for (++i; i != 16; ++i)
1905 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1906 return -1;
1907 } else if (ShuffleKind == 1) {
1908 // Check the rest of the elements to see if they are consecutive.
1909 for (++i; i != 16; ++i)
1910 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1911 return -1;
1912 } else
1913 return -1;
1914
1915 if (isLE)
1916 ShiftAmt = 16 - ShiftAmt;
1917
1918 return ShiftAmt;
1919}
1920
1921/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1922/// specifies a splat of a single element that is suitable for input to
1923/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1924bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1925 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1926, __PRETTY_FUNCTION__))
1926 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1926, __PRETTY_FUNCTION__))
;
1927
1928 // The consecutive indices need to specify an element, not part of two
1929 // different elements. So abandon ship early if this isn't the case.
1930 if (N->getMaskElt(0) % EltSize != 0)
1931 return false;
1932
1933 // This is a splat operation if each element of the permute is the same, and
1934 // if the value doesn't reference the second vector.
1935 unsigned ElementBase = N->getMaskElt(0);
1936
1937 // FIXME: Handle UNDEF elements too!
1938 if (ElementBase >= 16)
1939 return false;
1940
1941 // Check that the indices are consecutive, in the case of a multi-byte element
1942 // splatted with a v16i8 mask.
1943 for (unsigned i = 1; i != EltSize; ++i)
1944 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1945 return false;
1946
1947 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1948 if (N->getMaskElt(i) < 0) continue;
1949 for (unsigned j = 0; j != EltSize; ++j)
1950 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1951 return false;
1952 }
1953 return true;
1954}
1955
1956/// Check that the mask is shuffling N byte elements. Within each N byte
1957/// element of the mask, the indices could be either in increasing or
1958/// decreasing order as long as they are consecutive.
1959/// \param[in] N the shuffle vector SD Node to analyze
1960/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1961/// Word/DoubleWord/QuadWord).
1962/// \param[in] StepLen the delta indices number among the N byte element, if
1963/// the mask is in increasing/decreasing order then it is 1/-1.
1964/// \return true iff the mask is shuffling N byte elements.
1965static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1966 int StepLen) {
1967 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1968, __PRETTY_FUNCTION__))
1968 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1968, __PRETTY_FUNCTION__))
;
1969 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1969, __PRETTY_FUNCTION__))
;
1970
1971 unsigned NumOfElem = 16 / Width;
1972 unsigned MaskVal[16]; // Width is never greater than 16
1973 for (unsigned i = 0; i < NumOfElem; ++i) {
1974 MaskVal[0] = N->getMaskElt(i * Width);
1975 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1976 return false;
1977 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1978 return false;
1979 }
1980
1981 for (unsigned int j = 1; j < Width; ++j) {
1982 MaskVal[j] = N->getMaskElt(i * Width + j);
1983 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1984 return false;
1985 }
1986 }
1987 }
1988
1989 return true;
1990}
1991
1992bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1993 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1994 if (!isNByteElemShuffleMask(N, 4, 1))
1995 return false;
1996
1997 // Now we look at mask elements 0,4,8,12
1998 unsigned M0 = N->getMaskElt(0) / 4;
1999 unsigned M1 = N->getMaskElt(4) / 4;
2000 unsigned M2 = N->getMaskElt(8) / 4;
2001 unsigned M3 = N->getMaskElt(12) / 4;
2002 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2003 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2004
2005 // Below, let H and L be arbitrary elements of the shuffle mask
2006 // where H is in the range [4,7] and L is in the range [0,3].
2007 // H, 1, 2, 3 or L, 5, 6, 7
2008 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2009 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2010 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2011 InsertAtByte = IsLE ? 12 : 0;
2012 Swap = M0 < 4;
2013 return true;
2014 }
2015 // 0, H, 2, 3 or 4, L, 6, 7
2016 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2017 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2018 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2019 InsertAtByte = IsLE ? 8 : 4;
2020 Swap = M1 < 4;
2021 return true;
2022 }
2023 // 0, 1, H, 3 or 4, 5, L, 7
2024 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2025 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2026 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2027 InsertAtByte = IsLE ? 4 : 8;
2028 Swap = M2 < 4;
2029 return true;
2030 }
2031 // 0, 1, 2, H or 4, 5, 6, L
2032 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2033 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2034 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2035 InsertAtByte = IsLE ? 0 : 12;
2036 Swap = M3 < 4;
2037 return true;
2038 }
2039
2040 // If both vector operands for the shuffle are the same vector, the mask will
2041 // contain only elements from the first one and the second one will be undef.
2042 if (N->getOperand(1).isUndef()) {
2043 ShiftElts = 0;
2044 Swap = true;
2045 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2046 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2047 InsertAtByte = IsLE ? 12 : 0;
2048 return true;
2049 }
2050 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2051 InsertAtByte = IsLE ? 8 : 4;
2052 return true;
2053 }
2054 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2055 InsertAtByte = IsLE ? 4 : 8;
2056 return true;
2057 }
2058 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2059 InsertAtByte = IsLE ? 0 : 12;
2060 return true;
2061 }
2062 }
2063
2064 return false;
2065}
2066
2067bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2068 bool &Swap, bool IsLE) {
2069 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2069, __PRETTY_FUNCTION__))
;
25
'?' condition is true
2070 // Ensure each byte index of the word is consecutive.
2071 if (!isNByteElemShuffleMask(N, 4, 1))
26
Assuming the condition is false
27
Taking false branch
2072 return false;
2073
2074 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2075 unsigned M0 = N->getMaskElt(0) / 4;
2076 unsigned M1 = N->getMaskElt(4) / 4;
2077 unsigned M2 = N->getMaskElt(8) / 4;
2078 unsigned M3 = N->getMaskElt(12) / 4;
2079
2080 // If both vector operands for the shuffle are the same vector, the mask will
2081 // contain only elements from the first one and the second one will be undef.
2082 if (N->getOperand(1).isUndef()) {
28
Calling 'SDValue::isUndef'
34
Returning from 'SDValue::isUndef'
35
Taking false branch
2083 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2083, __PRETTY_FUNCTION__))
;
2084 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2085 return false;
2086
2087 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2088 Swap = false;
2089 return true;
2090 }
2091
2092 // Ensure each word index of the ShuffleVector Mask is consecutive.
2093 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
36
Assuming the condition is false
37
Assuming the condition is false
38
Assuming the condition is false
39
Taking false branch
2094 return false;
2095
2096 if (IsLE) {
40
Assuming 'IsLE' is false
41
Taking false branch
2097 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2098 // Input vectors don't need to be swapped if the leading element
2099 // of the result is one of the 3 left elements of the second vector
2100 // (or if there is no shift to be done at all).
2101 Swap = false;
2102 ShiftElts = (8 - M0) % 8;
2103 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2104 // Input vectors need to be swapped if the leading element
2105 // of the result is one of the 3 left elements of the first vector
2106 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2107 Swap = true;
2108 ShiftElts = (4 - M0) % 4;
2109 }
2110
2111 return true;
2112 } else { // BE
2113 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
42
Assuming 'M0' is not equal to 0
43
Assuming 'M0' is not equal to 1
44
Assuming 'M0' is not equal to 2
45
Assuming 'M0' is not equal to 3
46
Taking false branch
2114 // Input vectors don't need to be swapped if the leading element
2115 // of the result is one of the 4 elements of the first vector.
2116 Swap = false;
2117 ShiftElts = M0;
2118 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
47
Assuming 'M0' is not equal to 4
48
Assuming 'M0' is not equal to 5
49
Assuming 'M0' is not equal to 6
50
Assuming 'M0' is not equal to 7
51
Taking false branch
2119 // Input vectors need to be swapped if the leading element
2120 // of the result is one of the 4 elements of the right vector.
2121 Swap = true;
2122 ShiftElts = M0 - 4;
2123 }
2124
2125 return true;
52
Returning without writing to 'ShiftElts'
53
Returning the value 1, which participates in a condition later
2126 }
2127}
2128
2129bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2130 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2130, __PRETTY_FUNCTION__))
;
2131
2132 if (!isNByteElemShuffleMask(N, Width, -1))
2133 return false;
2134
2135 for (int i = 0; i < 16; i += Width)
2136 if (N->getMaskElt(i) != i + Width - 1)
2137 return false;
2138
2139 return true;
2140}
2141
2142bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2143 return isXXBRShuffleMaskHelper(N, 2);
2144}
2145
2146bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2147 return isXXBRShuffleMaskHelper(N, 4);
2148}
2149
2150bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2151 return isXXBRShuffleMaskHelper(N, 8);
2152}
2153
2154bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2155 return isXXBRShuffleMaskHelper(N, 16);
2156}
2157
2158/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2159/// if the inputs to the instruction should be swapped and set \p DM to the
2160/// value for the immediate.
2161/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2162/// AND element 0 of the result comes from the first input (LE) or second input
2163/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2164/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2165/// mask.
2166bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2167 bool &Swap, bool IsLE) {
2168 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__))
;
2169
2170 // Ensure each byte index of the double word is consecutive.
2171 if (!isNByteElemShuffleMask(N, 8, 1))
2172 return false;
2173
2174 unsigned M0 = N->getMaskElt(0) / 8;
2175 unsigned M1 = N->getMaskElt(8) / 8;
2176 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2176, __PRETTY_FUNCTION__))
;
2177
2178 // If both vector operands for the shuffle are the same vector, the mask will
2179 // contain only elements from the first one and the second one will be undef.
2180 if (N->getOperand(1).isUndef()) {
2181 if ((M0 | M1) < 2) {
2182 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2183 Swap = false;
2184 return true;
2185 } else
2186 return false;
2187 }
2188
2189 if (IsLE) {
2190 if (M0 > 1 && M1 < 2) {
2191 Swap = false;
2192 } else if (M0 < 2 && M1 > 1) {
2193 M0 = (M0 + 2) % 4;
2194 M1 = (M1 + 2) % 4;
2195 Swap = true;
2196 } else
2197 return false;
2198
2199 // Note: if control flow comes here that means Swap is already set above
2200 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2201 return true;
2202 } else { // BE
2203 if (M0 < 2 && M1 > 1) {
2204 Swap = false;
2205 } else if (M0 > 1 && M1 < 2) {
2206 M0 = (M0 + 2) % 4;
2207 M1 = (M1 + 2) % 4;
2208 Swap = true;
2209 } else
2210 return false;
2211
2212 // Note: if control flow comes here that means Swap is already set above
2213 DM = (M0 << 1) + (M1 & 1);
2214 return true;
2215 }
2216}
2217
2218
2219/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2220/// appropriate for PPC mnemonics (which have a big endian bias - namely
2221/// elements are counted from the left of the vector register).
2222unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2223 SelectionDAG &DAG) {
2224 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2225 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2225, __PRETTY_FUNCTION__))
;
2226 if (DAG.getDataLayout().isLittleEndian())
2227 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2228 else
2229 return SVOp->getMaskElt(0) / EltSize;
2230}
2231
2232/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2233/// by using a vspltis[bhw] instruction of the specified element size, return
2234/// the constant being splatted. The ByteSize field indicates the number of
2235/// bytes of each element [124] -> [bhw].
2236SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2237 SDValue OpVal(nullptr, 0);
2238
2239 // If ByteSize of the splat is bigger than the element size of the
2240 // build_vector, then we have a case where we are checking for a splat where
2241 // multiple elements of the buildvector are folded together into a single
2242 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2243 unsigned EltSize = 16/N->getNumOperands();
2244 if (EltSize < ByteSize) {
2245 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2246 SDValue UniquedVals[4];
2247 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2247, __PRETTY_FUNCTION__))
;
2248
2249 // See if all of the elements in the buildvector agree across.
2250 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2251 if (N->getOperand(i).isUndef()) continue;
2252 // If the element isn't a constant, bail fully out.
2253 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2254
2255 if (!UniquedVals[i&(Multiple-1)].getNode())
2256 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2257 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2258 return SDValue(); // no match.
2259 }
2260
2261 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2262 // either constant or undef values that are identical for each chunk. See
2263 // if these chunks can form into a larger vspltis*.
2264
2265 // Check to see if all of the leading entries are either 0 or -1. If
2266 // neither, then this won't fit into the immediate field.
2267 bool LeadingZero = true;
2268 bool LeadingOnes = true;
2269 for (unsigned i = 0; i != Multiple-1; ++i) {
2270 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2271
2272 LeadingZero &= isNullConstant(UniquedVals[i]);
2273 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2274 }
2275 // Finally, check the least significant entry.
2276 if (LeadingZero) {
2277 if (!UniquedVals[Multiple-1].getNode())
2278 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2279 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2280 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2281 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2282 }
2283 if (LeadingOnes) {
2284 if (!UniquedVals[Multiple-1].getNode())
2285 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2286 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2287 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2288 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2289 }
2290
2291 return SDValue();
2292 }
2293
2294 // Check to see if this buildvec has a single non-undef value in its elements.
2295 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2296 if (N->getOperand(i).isUndef()) continue;
2297 if (!OpVal.getNode())
2298 OpVal = N->getOperand(i);
2299 else if (OpVal != N->getOperand(i))
2300 return SDValue();
2301 }
2302
2303 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2304
2305 unsigned ValSizeInBytes = EltSize;
2306 uint64_t Value = 0;
2307 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2308 Value = CN->getZExtValue();
2309 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2310 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2310, __PRETTY_FUNCTION__))
;
2311 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2312 }
2313
2314 // If the splat value is larger than the element value, then we can never do
2315 // this splat. The only case that we could fit the replicated bits into our
2316 // immediate field for would be zero, and we prefer to use vxor for it.
2317 if (ValSizeInBytes < ByteSize) return SDValue();
2318
2319 // If the element value is larger than the splat value, check if it consists
2320 // of a repeated bit pattern of size ByteSize.
2321 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2322 return SDValue();
2323
2324 // Properly sign extend the value.
2325 int MaskVal = SignExtend32(Value, ByteSize * 8);
2326
2327 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2328 if (MaskVal == 0) return SDValue();
2329
2330 // Finally, if this value fits in a 5 bit sext field, return it
2331 if (SignExtend32<5>(MaskVal) == MaskVal)
2332 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2333 return SDValue();
2334}
2335
2336/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2337/// amount, otherwise return -1.
2338int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2339 EVT VT = N->getValueType(0);
2340 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2341 return -1;
2342
2343 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2344
2345 // Find the first non-undef value in the shuffle mask.
2346 unsigned i;
2347 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2348 /*search*/;
2349
2350 if (i == 4) return -1; // all undef.
2351
2352 // Otherwise, check to see if the rest of the elements are consecutively
2353 // numbered from this value.
2354 unsigned ShiftAmt = SVOp->getMaskElt(i);
2355 if (ShiftAmt < i) return -1;
2356 ShiftAmt -= i;
2357
2358 // Check the rest of the elements to see if they are consecutive.
2359 for (++i; i != 4; ++i)
2360 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2361 return -1;
2362
2363 return ShiftAmt;
2364}
2365
2366//===----------------------------------------------------------------------===//
2367// Addressing Mode Selection
2368//===----------------------------------------------------------------------===//
2369
2370/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2371/// or 64-bit immediate, and if the value can be accurately represented as a
2372/// sign extension from a 16-bit value. If so, this returns true and the
2373/// immediate.
2374bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2375 if (!isa<ConstantSDNode>(N))
2376 return false;
2377
2378 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2379 if (N->getValueType(0) == MVT::i32)
2380 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2381 else
2382 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2383}
2384bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2385 return isIntS16Immediate(Op.getNode(), Imm);
2386}
2387
2388
2389/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2390/// be represented as an indexed [r+r] operation.
2391bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2392 SDValue &Index,
2393 SelectionDAG &DAG) const {
2394 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2395 UI != E; ++UI) {
2396 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2397 if (Memop->getMemoryVT() == MVT::f64) {
2398 Base = N.getOperand(0);
2399 Index = N.getOperand(1);
2400 return true;
2401 }
2402 }
2403 }
2404 return false;
2405}
2406
2407/// SelectAddressRegReg - Given the specified addressed, check to see if it
2408/// can be represented as an indexed [r+r] operation. Returns false if it
2409/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2410/// non-zero and N can be represented by a base register plus a signed 16-bit
2411/// displacement, make a more precise judgement by checking (displacement % \p
2412/// EncodingAlignment).
2413bool PPCTargetLowering::SelectAddressRegReg(
2414 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2415 MaybeAlign EncodingAlignment) const {
2416 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2417 // a [pc+imm].
2418 if (SelectAddressPCRel(N, Base))
2419 return false;
2420
2421 int16_t Imm = 0;
2422 if (N.getOpcode() == ISD::ADD) {
2423 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2424 // SPE load/store can only handle 8-bit offsets.
2425 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2426 return true;
2427 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2428 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2429 return false; // r+i
2430 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2431 return false; // r+i
2432
2433 Base = N.getOperand(0);
2434 Index = N.getOperand(1);
2435 return true;
2436 } else if (N.getOpcode() == ISD::OR) {
2437 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2438 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2439 return false; // r+i can fold it if we can.
2440
2441 // If this is an or of disjoint bitfields, we can codegen this as an add
2442 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2443 // disjoint.
2444 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2445
2446 if (LHSKnown.Zero.getBoolValue()) {
2447 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2448 // If all of the bits are known zero on the LHS or RHS, the add won't
2449 // carry.
2450 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2451 Base = N.getOperand(0);
2452 Index = N.getOperand(1);
2453 return true;
2454 }
2455 }
2456 }
2457
2458 return false;
2459}
2460
2461// If we happen to be doing an i64 load or store into a stack slot that has
2462// less than a 4-byte alignment, then the frame-index elimination may need to
2463// use an indexed load or store instruction (because the offset may not be a
2464// multiple of 4). The extra register needed to hold the offset comes from the
2465// register scavenger, and it is possible that the scavenger will need to use
2466// an emergency spill slot. As a result, we need to make sure that a spill slot
2467// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2468// stack slot.
2469static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2470 // FIXME: This does not handle the LWA case.
2471 if (VT != MVT::i64)
2472 return;
2473
2474 // NOTE: We'll exclude negative FIs here, which come from argument
2475 // lowering, because there are no known test cases triggering this problem
2476 // using packed structures (or similar). We can remove this exclusion if
2477 // we find such a test case. The reason why this is so test-case driven is
2478 // because this entire 'fixup' is only to prevent crashes (from the
2479 // register scavenger) on not-really-valid inputs. For example, if we have:
2480 // %a = alloca i1
2481 // %b = bitcast i1* %a to i64*
2482 // store i64* a, i64 b
2483 // then the store should really be marked as 'align 1', but is not. If it
2484 // were marked as 'align 1' then the indexed form would have been
2485 // instruction-selected initially, and the problem this 'fixup' is preventing
2486 // won't happen regardless.
2487 if (FrameIdx < 0)
2488 return;
2489
2490 MachineFunction &MF = DAG.getMachineFunction();
2491 MachineFrameInfo &MFI = MF.getFrameInfo();
2492
2493 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2494 return;
2495
2496 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2497 FuncInfo->setHasNonRISpills();
2498}
2499
2500/// Returns true if the address N can be represented by a base register plus
2501/// a signed 16-bit displacement [r+imm], and if it is not better
2502/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2503/// displacements that are multiples of that value.
2504bool PPCTargetLowering::SelectAddressRegImm(
2505 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2506 MaybeAlign EncodingAlignment) const {
2507 // FIXME dl should come from parent load or store, not from address
2508 SDLoc dl(N);
2509
2510 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2511 // a [pc+imm].
2512 if (SelectAddressPCRel(N, Base))
2513 return false;
2514
2515 // If this can be more profitably realized as r+r, fail.
2516 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2517 return false;
2518
2519 if (N.getOpcode() == ISD::ADD) {
2520 int16_t imm = 0;
2521 if (isIntS16Immediate(N.getOperand(1), imm) &&
2522 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2523 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2524 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2525 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2526 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2527 } else {
2528 Base = N.getOperand(0);
2529 }
2530 return true; // [r+i]
2531 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2532 // Match LOAD (ADD (X, Lo(G))).
2533 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2534 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
;
2535 Disp = N.getOperand(1).getOperand(0); // The global address.
2536 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
2537 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
2538 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
2539 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
;
2540 Base = N.getOperand(0);
2541 return true; // [&g+r]
2542 }
2543 } else if (N.getOpcode() == ISD::OR) {
2544 int16_t imm = 0;
2545 if (isIntS16Immediate(N.getOperand(1), imm) &&
2546 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2547 // If this is an or of disjoint bitfields, we can codegen this as an add
2548 // (for better address arithmetic) if the LHS and RHS of the OR are
2549 // provably disjoint.
2550 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2551
2552 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2553 // If all of the bits are known zero on the LHS or RHS, the add won't
2554 // carry.
2555 if (FrameIndexSDNode *FI =
2556 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2557 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2558 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2559 } else {
2560 Base = N.getOperand(0);
2561 }
2562 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2563 return true;
2564 }
2565 }
2566 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2567 // Loading from a constant address.
2568
2569 // If this address fits entirely in a 16-bit sext immediate field, codegen
2570 // this as "d, 0"
2571 int16_t Imm;
2572 if (isIntS16Immediate(CN, Imm) &&
2573 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2574 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2575 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2576 CN->getValueType(0));
2577 return true;
2578 }
2579
2580 // Handle 32-bit sext immediates with LIS + addr mode.
2581 if ((CN->getValueType(0) == MVT::i32 ||
2582 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2583 (!EncodingAlignment ||
2584 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2585 int Addr = (int)CN->getZExtValue();
2586
2587 // Otherwise, break this down into an LIS + disp.
2588 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2589
2590 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2591 MVT::i32);
2592 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2593 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2594 return true;
2595 }
2596 }
2597
2598 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2599 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2600 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2601 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2602 } else
2603 Base = N;
2604 return true; // [r+0]
2605}
2606
2607/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2608/// represented as an indexed [r+r] operation.
2609bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2610 SDValue &Index,
2611 SelectionDAG &DAG) const {
2612 // Check to see if we can easily represent this as an [r+r] address. This
2613 // will fail if it thinks that the address is more profitably represented as
2614 // reg+imm, e.g. where imm = 0.
2615 if (SelectAddressRegReg(N, Base, Index, DAG))
2616 return true;
2617
2618 // If the address is the result of an add, we will utilize the fact that the
2619 // address calculation includes an implicit add. However, we can reduce
2620 // register pressure if we do not materialize a constant just for use as the
2621 // index register. We only get rid of the add if it is not an add of a
2622 // value and a 16-bit signed constant and both have a single use.
2623 int16_t imm = 0;
2624 if (N.getOpcode() == ISD::ADD &&
2625 (!isIntS16Immediate(N.getOperand(1), imm) ||
2626 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2627 Base = N.getOperand(0);
2628 Index = N.getOperand(1);
2629 return true;
2630 }
2631
2632 // Otherwise, do it the hard way, using R0 as the base register.
2633 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2634 N.getValueType());
2635 Index = N;
2636 return true;
2637}
2638
2639template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2640 Ty *PCRelCand = dyn_cast<Ty>(N);
2641 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2642}
2643
2644/// Returns true if this address is a PC Relative address.
2645/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2646/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2647bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2648 // This is a materialize PC Relative node. Always select this as PC Relative.
2649 Base = N;
2650 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2651 return true;
2652 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2653 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2654 isValidPCRelNode<JumpTableSDNode>(N) ||
2655 isValidPCRelNode<BlockAddressSDNode>(N))
2656 return true;
2657 return false;
2658}
2659
2660/// Returns true if we should use a direct load into vector instruction
2661/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2662static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2663
2664 // If there are any other uses other than scalar to vector, then we should
2665 // keep it as a scalar load -> direct move pattern to prevent multiple
2666 // loads.
2667 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2668 if (!LD)
2669 return false;
2670
2671 EVT MemVT = LD->getMemoryVT();
2672 if (!MemVT.isSimple())
2673 return false;
2674 switch(MemVT.getSimpleVT().SimpleTy) {
2675 case MVT::i64:
2676 break;
2677 case MVT::i32:
2678 if (!ST.hasP8Vector())
2679 return false;
2680 break;
2681 case MVT::i16:
2682 case MVT::i8:
2683 if (!ST.hasP9Vector())
2684 return false;
2685 break;
2686 default:
2687 return false;
2688 }
2689
2690 SDValue LoadedVal(N, 0);
2691 if (!LoadedVal.hasOneUse())
2692 return false;
2693
2694 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2695 UI != UE; ++UI)
2696 if (UI.getUse().get().getResNo() == 0 &&
2697 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2698 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2699 return false;
2700
2701 return true;
2702}
2703
2704/// getPreIndexedAddressParts - returns true by value, base pointer and
2705/// offset pointer and addressing mode by reference if the node's address
2706/// can be legally represented as pre-indexed load / store address.
2707bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2708 SDValue &Offset,
2709 ISD::MemIndexedMode &AM,
2710 SelectionDAG &DAG) const {
2711 if (DisablePPCPreinc) return false;
2712
2713 bool isLoad = true;
2714 SDValue Ptr;
2715 EVT VT;
2716 unsigned Alignment;
2717 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2718 Ptr = LD->getBasePtr();
2719 VT = LD->getMemoryVT();
2720 Alignment = LD->getAlignment();
2721 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2722 Ptr = ST->getBasePtr();
2723 VT = ST->getMemoryVT();
2724 Alignment = ST->getAlignment();
2725 isLoad = false;
2726 } else
2727 return false;
2728
2729 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2730 // instructions because we can fold these into a more efficient instruction
2731 // instead, (such as LXSD).
2732 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2733 return false;
2734 }
2735
2736 // PowerPC doesn't have preinc load/store instructions for vectors
2737 if (VT.isVector())
2738 return false;
2739
2740 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2741 // Common code will reject creating a pre-inc form if the base pointer
2742 // is a frame index, or if N is a store and the base pointer is either
2743 // the same as or a predecessor of the value being stored. Check for
2744 // those situations here, and try with swapped Base/Offset instead.
2745 bool Swap = false;
2746
2747 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2748 Swap = true;
2749 else if (!isLoad) {
2750 SDValue Val = cast<StoreSDNode>(N)->getValue();
2751 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2752 Swap = true;
2753 }
2754
2755 if (Swap)
2756 std::swap(Base, Offset);
2757
2758 AM = ISD::PRE_INC;
2759 return true;
2760 }
2761
2762 // LDU/STU can only handle immediates that are a multiple of 4.
2763 if (VT != MVT::i64) {
2764 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2765 return false;
2766 } else {
2767 // LDU/STU need an address with at least 4-byte alignment.
2768 if (Alignment < 4)
2769 return false;
2770
2771 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2772 return false;
2773 }
2774
2775 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2776 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2777 // sext i32 to i64 when addr mode is r+i.
2778 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2779 LD->getExtensionType() == ISD::SEXTLOAD &&
2780 isa<ConstantSDNode>(Offset))
2781 return false;
2782 }
2783
2784 AM = ISD::PRE_INC;
2785 return true;
2786}
2787
2788//===----------------------------------------------------------------------===//
2789// LowerOperation implementation
2790//===----------------------------------------------------------------------===//
2791
2792/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2793/// and LoOpFlags to the target MO flags.
2794static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2795 unsigned &HiOpFlags, unsigned &LoOpFlags,
2796 const GlobalValue *GV = nullptr) {
2797 HiOpFlags = PPCII::MO_HA;
2798 LoOpFlags = PPCII::MO_LO;
2799
2800 // Don't use the pic base if not in PIC relocation model.
2801 if (IsPIC) {
2802 HiOpFlags |= PPCII::MO_PIC_FLAG;
2803 LoOpFlags |= PPCII::MO_PIC_FLAG;
2804 }
2805}
2806
2807static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2808 SelectionDAG &DAG) {
2809 SDLoc DL(HiPart);
2810 EVT PtrVT = HiPart.getValueType();
2811 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2812
2813 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2814 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2815
2816 // With PIC, the first instruction is actually "GR+hi(&G)".
2817 if (isPIC)
2818 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2819 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2820
2821 // Generate non-pic code that has direct accesses to the constant pool.
2822 // The address of the global is just (hi(&g)+lo(&g)).
2823 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2824}
2825
2826static void setUsesTOCBasePtr(MachineFunction &MF) {
2827 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2828 FuncInfo->setUsesTOCBasePtr();
2829}
2830
2831static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2832 setUsesTOCBasePtr(DAG.getMachineFunction());
2833}
2834
2835SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2836 SDValue GA) const {
2837 const bool Is64Bit = Subtarget.isPPC64();
2838 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2839 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2840 : Subtarget.isAIXABI()
2841 ? DAG.getRegister(PPC::R2, VT)
2842 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2843 SDValue Ops[] = { GA, Reg };
2844 return DAG.getMemIntrinsicNode(
2845 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2846 MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2847 MachineMemOperand::MOLoad);
2848}
2849
2850SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2851 SelectionDAG &DAG) const {
2852 EVT PtrVT = Op.getValueType();
2853 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2854 const Constant *C = CP->getConstVal();
2855
2856 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2857 // The actual address of the GlobalValue is stored in the TOC.
2858 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2859 if (Subtarget.isUsingPCRelativeCalls()) {
2860 SDLoc DL(CP);
2861 EVT Ty = getPointerTy(DAG.getDataLayout());
2862 SDValue ConstPool = DAG.getTargetConstantPool(
2863 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2864 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2865 }
2866 setUsesTOCBasePtr(DAG);
2867 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2868 return getTOCEntry(DAG, SDLoc(CP), GA);
2869 }
2870
2871 unsigned MOHiFlag, MOLoFlag;
2872 bool IsPIC = isPositionIndependent();
2873 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2874
2875 if (IsPIC && Subtarget.isSVR4ABI()) {
2876 SDValue GA =
2877 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2878 return getTOCEntry(DAG, SDLoc(CP), GA);
2879 }
2880
2881 SDValue CPIHi =
2882 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2883 SDValue CPILo =
2884 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2885 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2886}
2887
2888// For 64-bit PowerPC, prefer the more compact relative encodings.
2889// This trades 32 bits per jump table entry for one or two instructions
2890// on the jump site.
2891unsigned PPCTargetLowering::getJumpTableEncoding() const {
2892 if (isJumpTableRelative())
2893 return MachineJumpTableInfo::EK_LabelDifference32;
2894
2895 return TargetLowering::getJumpTableEncoding();
2896}
2897
2898bool PPCTargetLowering::isJumpTableRelative() const {
2899 if (UseAbsoluteJumpTables)
2900 return false;
2901 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2902 return true;
2903 return TargetLowering::isJumpTableRelative();
2904}
2905
2906SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2907 SelectionDAG &DAG) const {
2908 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2909 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2910
2911 switch (getTargetMachine().getCodeModel()) {
2912 case CodeModel::Small:
2913 case CodeModel::Medium:
2914 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2915 default:
2916 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2917 getPointerTy(DAG.getDataLayout()));
2918 }
2919}
2920
2921const MCExpr *
2922PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2923 unsigned JTI,
2924 MCContext &Ctx) const {
2925 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2926 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2927
2928 switch (getTargetMachine().getCodeModel()) {
2929 case CodeModel::Small:
2930 case CodeModel::Medium:
2931 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2932 default:
2933 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2934 }
2935}
2936
2937SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2938 EVT PtrVT = Op.getValueType();
2939 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2940
2941 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2942 if (Subtarget.isUsingPCRelativeCalls()) {
2943 SDLoc DL(JT);
2944 EVT Ty = getPointerTy(DAG.getDataLayout());
2945 SDValue GA =
2946 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2947 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2948 return MatAddr;
2949 }
2950
2951 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2952 // The actual address of the GlobalValue is stored in the TOC.
2953 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2954 setUsesTOCBasePtr(DAG);
2955 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2956 return getTOCEntry(DAG, SDLoc(JT), GA);
2957 }
2958
2959 unsigned MOHiFlag, MOLoFlag;
2960 bool IsPIC = isPositionIndependent();
2961 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2962
2963 if (IsPIC && Subtarget.isSVR4ABI()) {
2964 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2965 PPCII::MO_PIC_FLAG);
2966 return getTOCEntry(DAG, SDLoc(GA), GA);
2967 }
2968
2969 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2970 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2971 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2972}
2973
2974SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2975 SelectionDAG &DAG) const {
2976 EVT PtrVT = Op.getValueType();
2977 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2978 const BlockAddress *BA = BASDN->getBlockAddress();
2979
2980 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2981 if (Subtarget.isUsingPCRelativeCalls()) {
2982 SDLoc DL(BASDN);
2983 EVT Ty = getPointerTy(DAG.getDataLayout());
2984 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2985 PPCII::MO_PCREL_FLAG);
2986 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2987 return MatAddr;
2988 }
2989
2990 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2991 // The actual BlockAddress is stored in the TOC.
2992 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2993 setUsesTOCBasePtr(DAG);
2994 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2995 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2996 }
2997
2998 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2999 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3000 return getTOCEntry(
3001 DAG, SDLoc(BASDN),
3002 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3003
3004 unsigned MOHiFlag, MOLoFlag;
3005 bool IsPIC = isPositionIndependent();
3006 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3007 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3008 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3009 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3010}
3011
3012SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3013 SelectionDAG &DAG) const {
3014 // FIXME: TLS addresses currently use medium model code sequences,
3015 // which is the most useful form. Eventually support for small and
3016 // large models could be added if users need it, at the cost of
3017 // additional complexity.
3018 if (Subtarget.isUsingPCRelativeCalls() && !EnablePPCPCRelTLS)
3019 report_fatal_error("Thread local storage is not supported with pc-relative"
3020 " addressing - please compile with -mno-pcrel");
3021 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3022 if (DAG.getTarget().useEmulatedTLS())
3023 return LowerToTLSEmulatedModel(GA, DAG);
3024
3025 SDLoc dl(GA);
3026 const GlobalValue *GV = GA->getGlobal();
3027 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3028 bool is64bit = Subtarget.isPPC64();
3029 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3030 PICLevel::Level picLevel = M->getPICLevel();
3031
3032 const TargetMachine &TM = getTargetMachine();
3033 TLSModel::Model Model = TM.getTLSModel(GV);
3034
3035 if (Model == TLSModel::LocalExec) {
3036 if (Subtarget.isUsingPCRelativeCalls()) {
3037 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3038 SDValue TGA = DAG.getTargetGlobalAddress(
3039 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3040 SDValue MatAddr =
3041 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3042 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3043 }
3044
3045 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3046 PPCII::MO_TPREL_HA);
3047 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3048 PPCII::MO_TPREL_LO);
3049 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3050 : DAG.getRegister(PPC::R2, MVT::i32);
3051
3052 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3053 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3054 }
3055
3056 if (Model == TLSModel::InitialExec) {
3057 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3058 SDValue TGA = DAG.getTargetGlobalAddress(
3059 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3060 SDValue TGATLS = DAG.getTargetGlobalAddress(
3061 GV, dl, PtrVT, 0,
3062 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3063 SDValue TPOffset;
3064 if (IsPCRel) {
3065 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3066 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3067 MachinePointerInfo());
3068 } else {
3069 SDValue GOTPtr;
3070 if (is64bit) {
3071 setUsesTOCBasePtr(DAG);
3072 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3073 GOTPtr =
3074 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3075 } else {
3076 if (!TM.isPositionIndependent())
3077 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3078 else if (picLevel == PICLevel::SmallPIC)
3079 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3080 else
3081 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3082 }
3083 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3084 }
3085 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3086 }
3087
3088 if (Model == TLSModel::GeneralDynamic) {
3089 if (Subtarget.isUsingPCRelativeCalls()) {
3090 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3091 PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3092 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3093 }
3094
3095 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3096 SDValue GOTPtr;
3097 if (is64bit) {
3098 setUsesTOCBasePtr(DAG);
3099 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3100 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3101 GOTReg, TGA);
3102 } else {
3103 if (picLevel == PICLevel::SmallPIC)
3104 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3105 else
3106 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3107 }
3108 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3109 GOTPtr, TGA, TGA);
3110 }
3111
3112 if (Model == TLSModel::LocalDynamic) {
3113 if (Subtarget.isUsingPCRelativeCalls()) {
3114 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3115 PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3116 SDValue MatPCRel =
3117 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3118 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3119 }
3120
3121 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3122 SDValue GOTPtr;
3123 if (is64bit) {
3124 setUsesTOCBasePtr(DAG);
3125 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3126 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3127 GOTReg, TGA);
3128 } else {
3129 if (picLevel == PICLevel::SmallPIC)
3130 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3131 else
3132 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3133 }
3134 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3135 PtrVT, GOTPtr, TGA, TGA);
3136 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3137 PtrVT, TLSAddr, TGA);
3138 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3139 }
3140
3141 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3141)
;
3142}
3143
3144SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3145 SelectionDAG &DAG) const {
3146 EVT PtrVT = Op.getValueType();
3147 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3148 SDLoc DL(GSDN);
3149 const GlobalValue *GV = GSDN->getGlobal();
3150
3151 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3152 // The actual address of the GlobalValue is stored in the TOC.
3153 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3154 if (Subtarget.isUsingPCRelativeCalls()) {
3155 EVT Ty = getPointerTy(DAG.getDataLayout());
3156 if (isAccessedAsGotIndirect(Op)) {
3157 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3158 PPCII::MO_PCREL_FLAG |
3159 PPCII::MO_GOT_FLAG);
3160 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3161 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3162 MachinePointerInfo());
3163 return Load;
3164 } else {
3165 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3166 PPCII::MO_PCREL_FLAG);
3167 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3168 }
3169 }
3170 setUsesTOCBasePtr(DAG);
3171 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3172 return getTOCEntry(DAG, DL, GA);
3173 }
3174
3175 unsigned MOHiFlag, MOLoFlag;
3176 bool IsPIC = isPositionIndependent();
3177 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3178
3179 if (IsPIC && Subtarget.isSVR4ABI()) {
3180 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3181 GSDN->getOffset(),
3182 PPCII::MO_PIC_FLAG);
3183 return getTOCEntry(DAG, DL, GA);
3184 }
3185
3186 SDValue GAHi =
3187 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3188 SDValue GALo =
3189 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3190
3191 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3192}
3193
3194SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3195 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3196 SDLoc dl(Op);
3197
3198 if (Op.getValueType() == MVT::v2i64) {
3199 // When the operands themselves are v2i64 values, we need to do something
3200 // special because VSX has no underlying comparison operations for these.
3201 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3202 // Equality can be handled by casting to the legal type for Altivec
3203 // comparisons, everything else needs to be expanded.
3204 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3205 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3206 DAG.getSetCC(dl, MVT::v4i32,
3207 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3208 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3209 CC));
3210 }
3211
3212 return SDValue();
3213 }
3214
3215 // We handle most of these in the usual way.
3216 return Op;
3217 }
3218
3219 // If we're comparing for equality to zero, expose the fact that this is
3220 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3221 // fold the new nodes.
3222 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3223 return V;
3224
3225 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3226 // Leave comparisons against 0 and -1 alone for now, since they're usually
3227 // optimized. FIXME: revisit this when we can custom lower all setcc
3228 // optimizations.
3229 if (C->isAllOnesValue() || C->isNullValue())
3230 return SDValue();
3231 }
3232
3233 // If we have an integer seteq/setne, turn it into a compare against zero
3234 // by xor'ing the rhs with the lhs, which is faster than setting a
3235 // condition register, reading it back out, and masking the correct bit. The
3236 // normal approach here uses sub to do this instead of xor. Using xor exposes
3237 // the result to other bit-twiddling opportunities.
3238 EVT LHSVT = Op.getOperand(0).getValueType();
3239 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3240 EVT VT = Op.getValueType();
3241 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3242 Op.getOperand(1));
3243 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3244 }
3245 return SDValue();
3246}
3247
3248SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3249 SDNode *Node = Op.getNode();
3250 EVT VT = Node->getValueType(0);
3251 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3252 SDValue InChain = Node->getOperand(0);
3253 SDValue VAListPtr = Node->getOperand(1);
3254 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3255 SDLoc dl(Node);
3256
3257 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3257, __PRETTY_FUNCTION__))
;
3258
3259 // gpr_index
3260 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3261 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3262 InChain = GprIndex.getValue(1);
3263
3264 if (VT == MVT::i64) {
3265 // Check if GprIndex is even
3266 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3267 DAG.getConstant(1, dl, MVT::i32));
3268 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3269 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3270 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3271 DAG.getConstant(1, dl, MVT::i32));
3272 // Align GprIndex to be even if it isn't
3273 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3274 GprIndex);
3275 }
3276
3277 // fpr index is 1 byte after gpr
3278 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3279 DAG.getConstant(1, dl, MVT::i32));
3280
3281 // fpr
3282 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3283 FprPtr, MachinePointerInfo(SV), MVT::i8);
3284 InChain = FprIndex.getValue(1);
3285
3286 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3287 DAG.getConstant(8, dl, MVT::i32));
3288
3289 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3290 DAG.getConstant(4, dl, MVT::i32));
3291
3292 // areas
3293 SDValue OverflowArea =
3294 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3295 InChain = OverflowArea.getValue(1);
3296
3297 SDValue RegSaveArea =
3298 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3299 InChain = RegSaveArea.getValue(1);
3300
3301 // select overflow_area if index > 8
3302 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3303 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3304
3305 // adjustment constant gpr_index * 4/8
3306 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3307 VT.isInteger() ? GprIndex : FprIndex,
3308 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3309 MVT::i32));
3310
3311 // OurReg = RegSaveArea + RegConstant
3312 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3313 RegConstant);
3314
3315 // Floating types are 32 bytes into RegSaveArea
3316 if (VT.isFloatingPoint())
3317 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3318 DAG.getConstant(32, dl, MVT::i32));
3319
3320 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3321 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3322 VT.isInteger() ? GprIndex : FprIndex,
3323 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3324 MVT::i32));
3325
3326 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3327 VT.isInteger() ? VAListPtr : FprPtr,
3328 MachinePointerInfo(SV), MVT::i8);
3329
3330 // determine if we should load from reg_save_area or overflow_area
3331 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3332
3333 // increase overflow_area by 4/8 if gpr/fpr > 8
3334 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3335 DAG.getConstant(VT.isInteger() ? 4 : 8,
3336 dl, MVT::i32));
3337
3338 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3339 OverflowAreaPlusN);
3340
3341 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3342 MachinePointerInfo(), MVT::i32);
3343
3344 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3345}
3346
3347SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3348 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3348, __PRETTY_FUNCTION__))
;
3349
3350 // We have to copy the entire va_list struct:
3351 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3352 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3353 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3354 false, true, false, MachinePointerInfo(),
3355 MachinePointerInfo());
3356}
3357
3358SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3359 SelectionDAG &DAG) const {
3360 if (Subtarget.isAIXABI())
3361 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3362
3363 return Op.getOperand(0);
3364}
3365
3366SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3367 SelectionDAG &DAG) const {
3368 if (Subtarget.isAIXABI())
3369 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3370
3371 SDValue Chain = Op.getOperand(0);
3372 SDValue Trmp = Op.getOperand(1); // trampoline
3373 SDValue FPtr = Op.getOperand(2); // nested function
3374 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3375 SDLoc dl(Op);
3376
3377 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3378 bool isPPC64 = (PtrVT == MVT::i64);
3379 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3380
3381 TargetLowering::ArgListTy Args;
3382 TargetLowering::ArgListEntry Entry;
3383
3384 Entry.Ty = IntPtrTy;
3385 Entry.Node = Trmp; Args.push_back(Entry);
3386
3387 // TrampSize == (isPPC64 ? 48 : 40);
3388 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3389 isPPC64 ? MVT::i64 : MVT::i32);
3390 Args.push_back(Entry);
3391
3392 Entry.Node = FPtr; Args.push_back(Entry);
3393 Entry.Node = Nest; Args.push_back(Entry);
3394
3395 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3396 TargetLowering::CallLoweringInfo CLI(DAG);
3397 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3398 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3399 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3400
3401 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3402 return CallResult.second;
3403}
3404
3405SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3406 MachineFunction &MF = DAG.getMachineFunction();
3407 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3408 EVT PtrVT = getPointerTy(MF.getDataLayout());
3409
3410 SDLoc dl(Op);
3411
3412 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3413 // vastart just stores the address of the VarArgsFrameIndex slot into the
3414 // memory location argument.
3415 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3416 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3417 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3418 MachinePointerInfo(SV));
3419 }
3420
3421 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3422 // We suppose the given va_list is already allocated.
3423 //
3424 // typedef struct {
3425 // char gpr; /* index into the array of 8 GPRs
3426 // * stored in the register save area
3427 // * gpr=0 corresponds to r3,
3428 // * gpr=1 to r4, etc.
3429 // */
3430 // char fpr; /* index into the array of 8 FPRs
3431 // * stored in the register save area
3432 // * fpr=0 corresponds to f1,
3433 // * fpr=1 to f2, etc.
3434 // */
3435 // char *overflow_arg_area;
3436 // /* location on stack that holds
3437 // * the next overflow argument
3438 // */
3439 // char *reg_save_area;
3440 // /* where r3:r10 and f1:f8 (if saved)
3441 // * are stored
3442 // */
3443 // } va_list[1];
3444
3445 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3446 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3447 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3448 PtrVT);
3449 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3450 PtrVT);
3451
3452 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3453 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3454
3455 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3456 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3457
3458 uint64_t FPROffset = 1;
3459 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3460
3461 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3462
3463 // Store first byte : number of int regs
3464 SDValue firstStore =
3465 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3466 MachinePointerInfo(SV), MVT::i8);
3467 uint64_t nextOffset = FPROffset;
3468 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3469 ConstFPROffset);
3470
3471 // Store second byte : number of float regs
3472 SDValue secondStore =
3473 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3474 MachinePointerInfo(SV, nextOffset), MVT::i8);
3475 nextOffset += StackOffset;
3476 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3477
3478 // Store second word : arguments given on stack
3479 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3480 MachinePointerInfo(SV, nextOffset));
3481 nextOffset += FrameOffset;
3482 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3483
3484 // Store third word : arguments given in registers
3485 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3486 MachinePointerInfo(SV, nextOffset));
3487}
3488
3489/// FPR - The set of FP registers that should be allocated for arguments
3490/// on Darwin and AIX.
3491static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3492 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3493 PPC::F11, PPC::F12, PPC::F13};
3494
3495/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3496/// the stack.
3497static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3498 unsigned PtrByteSize) {
3499 unsigned ArgSize = ArgVT.getStoreSize();
3500 if (Flags.isByVal())
3501 ArgSize = Flags.getByValSize();
3502
3503 // Round up to multiples of the pointer size, except for array members,
3504 // which are always packed.
3505 if (!Flags.isInConsecutiveRegs())
3506 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3507
3508 return ArgSize;
3509}
3510
3511/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3512/// on the stack.
3513static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3514 ISD::ArgFlagsTy Flags,
3515 unsigned PtrByteSize) {
3516 Align Alignment(PtrByteSize);
3517
3518 // Altivec parameters are padded to a 16 byte boundary.
3519 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3520 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3521 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3522 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3523 Alignment = Align(16);
3524
3525 // ByVal parameters are aligned as requested.
3526 if (Flags.isByVal()) {
3527 auto BVAlign = Flags.getNonZeroByValAlign();
3528 if (BVAlign > PtrByteSize) {
3529 if (BVAlign.value() % PtrByteSize != 0)
3530 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3531)
3531 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3531)
;
3532
3533 Alignment = BVAlign;
3534 }
3535 }
3536
3537 // Array members are always packed to their original alignment.
3538 if (Flags.isInConsecutiveRegs()) {
3539 // If the array member was split into multiple registers, the first
3540 // needs to be aligned to the size of the full type. (Except for
3541 // ppcf128, which is only aligned as its f64 components.)
3542 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3543 Alignment = Align(OrigVT.getStoreSize());
3544 else
3545 Alignment = Align(ArgVT.getStoreSize());
3546 }
3547
3548 return Alignment;
3549}
3550
3551/// CalculateStackSlotUsed - Return whether this argument will use its
3552/// stack slot (instead of being passed in registers). ArgOffset,
3553/// AvailableFPRs, and AvailableVRs must hold the current argument
3554/// position, and will be updated to account for this argument.
3555static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3556 unsigned PtrByteSize, unsigned LinkageSize,
3557 unsigned ParamAreaSize, unsigned &ArgOffset,
3558 unsigned &AvailableFPRs,
3559 unsigned &AvailableVRs) {
3560 bool UseMemory = false;
3561
3562 // Respect alignment of argument on the stack.
3563 Align Alignment =
3564 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3565 ArgOffset = alignTo(ArgOffset, Alignment);
3566 // If there's no space left in the argument save area, we must
3567 // use memory (this check also catches zero-sized arguments).
3568 if (ArgOffset >= LinkageSize + ParamAreaSize)
3569 UseMemory = true;
3570
3571 // Allocate argument on the stack.
3572 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3573 if (Flags.isInConsecutiveRegsLast())
3574 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3575 // If we overran the argument save area, we must use memory
3576 // (this check catches arguments passed partially in memory)
3577 if (ArgOffset > LinkageSize + ParamAreaSize)
3578 UseMemory = true;
3579
3580 // However, if the argument is actually passed in an FPR or a VR,
3581 // we don't use memory after all.
3582 if (!Flags.isByVal()) {
3583 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3584 if (AvailableFPRs > 0) {
3585 --AvailableFPRs;
3586 return false;
3587 }
3588 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3589 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3590 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3591 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3592 if (AvailableVRs > 0) {
3593 --AvailableVRs;
3594 return false;
3595 }
3596 }
3597
3598 return UseMemory;
3599}
3600
3601/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3602/// ensure minimum alignment required for target.
3603static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3604 unsigned NumBytes) {
3605 return alignTo(NumBytes, Lowering->getStackAlign());
3606}
3607
3608SDValue PPCTargetLowering::LowerFormalArguments(
3609 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3610 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3611 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3612 if (Subtarget.isAIXABI())
3613 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3614 InVals);
3615 if (Subtarget.is64BitELFABI())
3616 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3617 InVals);
3618 if (Subtarget.is32BitELFABI())
3619 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3620 InVals);
3621
3622 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3623 InVals);
3624}
3625
3626SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3627 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3628 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3629 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3630
3631 // 32-bit SVR4 ABI Stack Frame Layout:
3632 // +-----------------------------------+
3633 // +--> | Back chain |
3634 // | +-----------------------------------+
3635 // | | Floating-point register save area |
3636 // | +-----------------------------------+
3637 // | | General register save area |
3638 // | +-----------------------------------+
3639 // | | CR save word |
3640 // | +-----------------------------------+
3641 // | | VRSAVE save word |
3642 // | +-----------------------------------+
3643 // | | Alignment padding |
3644 // | +-----------------------------------+
3645 // | | Vector register save area |
3646 // | +-----------------------------------+
3647 // | | Local variable space |
3648 // | +-----------------------------------+
3649 // | | Parameter list area |
3650 // | +-----------------------------------+
3651 // | | LR save word |
3652 // | +-----------------------------------+
3653 // SP--> +--- | Back chain |
3654 // +-----------------------------------+
3655 //
3656 // Specifications:
3657 // System V Application Binary Interface PowerPC Processor Supplement
3658 // AltiVec Technology Programming Interface Manual
3659
3660 MachineFunction &MF = DAG.getMachineFunction();
3661 MachineFrameInfo &MFI = MF.getFrameInfo();
3662 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3663
3664 EVT PtrVT = getPointerTy(MF.getDataLayout());
3665 // Potential tail calls could cause overwriting of argument stack slots.
3666 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3667 (CallConv == CallingConv::Fast));
3668 const Align PtrAlign(4);
3669
3670 // Assign locations to all of the incoming arguments.
3671 SmallVector<CCValAssign, 16> ArgLocs;
3672 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3673 *DAG.getContext());
3674
3675 // Reserve space for the linkage area on the stack.
3676 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3677 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3678 if (useSoftFloat())
3679 CCInfo.PreAnalyzeFormalArguments(Ins);
3680
3681 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3682 CCInfo.clearWasPPCF128();
3683
3684 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3685 CCValAssign &VA = ArgLocs[i];
3686
3687 // Arguments stored in registers.
3688 if (VA.isRegLoc()) {
3689 const TargetRegisterClass *RC;
3690 EVT ValVT = VA.getValVT();
3691
3692 switch (ValVT.getSimpleVT().SimpleTy) {
3693 default:
3694 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3694)
;
3695 case MVT::i1:
3696 case MVT::i32:
3697 RC = &PPC::GPRCRegClass;
3698 break;
3699 case MVT::f32:
3700 if (Subtarget.hasP8Vector())
3701 RC = &PPC::VSSRCRegClass;
3702 else if (Subtarget.hasSPE())
3703 RC = &PPC::GPRCRegClass;
3704 else
3705 RC = &PPC::F4RCRegClass;
3706 break;
3707 case MVT::f64:
3708 if (Subtarget.hasVSX())
3709 RC = &PPC::VSFRCRegClass;
3710 else if (Subtarget.hasSPE())
3711 // SPE passes doubles in GPR pairs.
3712 RC = &PPC::GPRCRegClass;
3713 else
3714 RC = &PPC::F8RCRegClass;
3715 break;
3716 case MVT::v16i8:
3717 case MVT::v8i16:
3718 case MVT::v4i32:
3719 RC = &PPC::VRRCRegClass;
3720 break;
3721 case MVT::v4f32:
3722 RC = &PPC::VRRCRegClass;
3723 break;
3724 case MVT::v2f64:
3725 case MVT::v2i64:
3726 RC = &PPC::VRRCRegClass;
3727 break;
3728 }
3729
3730 SDValue ArgValue;
3731 // Transform the arguments stored in physical registers into
3732 // virtual ones.
3733 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3734 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3734, __PRETTY_FUNCTION__))
;
3735 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3736 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3737 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3738 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3739 if (!Subtarget.isLittleEndian())
3740 std::swap (ArgValueLo, ArgValueHi);
3741 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3742 ArgValueHi);
3743 } else {
3744 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3745 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3746 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3747 if (ValVT == MVT::i1)
3748 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3749 }
3750
3751 InVals.push_back(ArgValue);
3752 } else {
3753 // Argument stored in memory.
3754 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3754, __PRETTY_FUNCTION__))
;
3755
3756 // Get the extended size of the argument type in stack
3757 unsigned ArgSize = VA.getLocVT().getStoreSize();
3758 // Get the actual size of the argument type
3759 unsigned ObjSize = VA.getValVT().getStoreSize();
3760 unsigned ArgOffset = VA.getLocMemOffset();
3761 // Stack objects in PPC32 are right justified.
3762 ArgOffset += ArgSize - ObjSize;
3763 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3764
3765 // Create load nodes to retrieve arguments from the stack.
3766 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3767 InVals.push_back(
3768 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3769 }
3770 }
3771
3772 // Assign locations to all of the incoming aggregate by value arguments.
3773 // Aggregates passed by value are stored in the local variable space of the
3774 // caller's stack frame, right above the parameter list area.
3775 SmallVector<CCValAssign, 16> ByValArgLocs;
3776 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3777 ByValArgLocs, *DAG.getContext());
3778
3779 // Reserve stack space for the allocations in CCInfo.
3780 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3781
3782 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3783
3784 // Area that is at least reserved in the caller of this function.
3785 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3786 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3787
3788 // Set the size that is at least reserved in caller of this function. Tail
3789 // call optimized function's reserved stack space needs to be aligned so that
3790 // taking the difference between two stack areas will result in an aligned
3791 // stack.
3792 MinReservedArea =
3793 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3794 FuncInfo->setMinReservedArea(MinReservedArea);
3795
3796 SmallVector<SDValue, 8> MemOps;
3797
3798 // If the function takes variable number of arguments, make a frame index for
3799 // the start of the first vararg value... for expansion of llvm.va_start.
3800 if (isVarArg) {
3801 static const MCPhysReg GPArgRegs[] = {
3802 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3803 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3804 };
3805 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3806
3807 static const MCPhysReg FPArgRegs[] = {
3808 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3809 PPC::F8
3810 };
3811 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3812
3813 if (useSoftFloat() || hasSPE())
3814 NumFPArgRegs = 0;
3815
3816 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3817 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3818
3819 // Make room for NumGPArgRegs and NumFPArgRegs.
3820 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3821 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3822
3823 FuncInfo->setVarArgsStackOffset(
3824 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3825 CCInfo.getNextStackOffset(), true));
3826
3827 FuncInfo->setVarArgsFrameIndex(
3828 MFI.CreateStackObject(Depth, Align(8), false));
3829 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3830
3831 // The fixed integer arguments of a variadic function are stored to the
3832 // VarArgsFrameIndex on the stack so that they may be loaded by
3833 // dereferencing the result of va_next.
3834 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3835 // Get an existing live-in vreg, or add a new one.
3836 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3837 if (!VReg)
3838 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3839
3840 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3841 SDValue Store =
3842 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3843 MemOps.push_back(Store);
3844 // Increment the address by four for the next argument to store
3845 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3846 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3847 }
3848
3849 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3850 // is set.
3851 // The double arguments are stored to the VarArgsFrameIndex
3852 // on the stack.
3853 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3854 // Get an existing live-in vreg, or add a new one.
3855 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3856 if (!VReg)
3857 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3858
3859 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3860 SDValue Store =
3861 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3862 MemOps.push_back(Store);
3863 // Increment the address by eight for the next argument to store
3864 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3865 PtrVT);
3866 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3867 }
3868 }
3869
3870 if (!MemOps.empty())
3871 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3872
3873 return Chain;
3874}
3875
3876// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3877// value to MVT::i64 and then truncate to the correct register size.
3878SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3879 EVT ObjectVT, SelectionDAG &DAG,
3880 SDValue ArgVal,
3881 const SDLoc &dl) const {
3882 if (Flags.isSExt())
3883 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3884 DAG.getValueType(ObjectVT));
3885 else if (Flags.isZExt())
3886 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3887 DAG.getValueType(ObjectVT));
3888
3889 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3890}
3891
3892SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3893 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3894 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3895 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3896 // TODO: add description of PPC stack frame format, or at least some docs.
3897 //
3898 bool isELFv2ABI = Subtarget.isELFv2ABI();
3899 bool isLittleEndian = Subtarget.isLittleEndian();
3900 MachineFunction &MF = DAG.getMachineFunction();
3901 MachineFrameInfo &MFI = MF.getFrameInfo();
3902 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3903
3904 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3905, __PRETTY_FUNCTION__))
3905 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3905, __PRETTY_FUNCTION__))
;
3906
3907 EVT PtrVT = getPointerTy(MF.getDataLayout());
3908 // Potential tail calls could cause overwriting of argument stack slots.
3909 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3910 (CallConv == CallingConv::Fast));
3911 unsigned PtrByteSize = 8;
3912 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3913
3914 static const MCPhysReg GPR[] = {
3915 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3916 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3917 };
3918 static const MCPhysReg VR[] = {
3919 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3920 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3921 };
3922
3923 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3924 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3925 const unsigned Num_VR_Regs = array_lengthof(VR);
3926
3927 // Do a first pass over the arguments to determine whether the ABI
3928 // guarantees that our caller has allocated the parameter save area
3929 // on its stack frame. In the ELFv1 ABI, this is always the case;
3930 // in the ELFv2 ABI, it is true if this is a vararg function or if
3931 // any parameter is located in a stack slot.
3932
3933 bool HasParameterArea = !isELFv2ABI || isVarArg;
3934 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3935 unsigned NumBytes = LinkageSize;
3936 unsigned AvailableFPRs = Num_FPR_Regs;
3937 unsigned AvailableVRs = Num_VR_Regs;
3938 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3939 if (Ins[i].Flags.isNest())
3940 continue;
3941
3942 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3943 PtrByteSize, LinkageSize, ParamAreaSize,
3944 NumBytes, AvailableFPRs, AvailableVRs))
3945 HasParameterArea = true;
3946 }
3947
3948 // Add DAG nodes to load the arguments or copy them out of registers. On
3949 // entry to a function on PPC, the arguments start after the linkage area,
3950 // although the first ones are often in registers.
3951
3952 unsigned ArgOffset = LinkageSize;
3953 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3954 SmallVector<SDValue, 8> MemOps;
3955 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3956 unsigned CurArgIdx = 0;
3957 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3958 SDValue ArgVal;
3959 bool needsLoad = false;
3960 EVT ObjectVT = Ins[ArgNo].VT;
3961 EVT OrigVT = Ins[ArgNo].ArgVT;
3962 unsigned ObjSize = ObjectVT.getStoreSize();
3963 unsigned ArgSize = ObjSize;
3964 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3965 if (Ins[ArgNo].isOrigArg()) {
3966 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3967 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3968 }
3969 // We re-align the argument offset for each argument, except when using the
3970 // fast calling convention, when we need to make sure we do that only when
3971 // we'll actually use a stack slot.
3972 unsigned CurArgOffset;
3973 Align Alignment;
3974 auto ComputeArgOffset = [&]() {
3975 /* Respect alignment of argument on the stack. */
3976 Alignment =
3977 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3978 ArgOffset = alignTo(ArgOffset, Alignment);
3979 CurArgOffset = ArgOffset;
3980 };
3981
3982 if (CallConv != CallingConv::Fast) {
3983 ComputeArgOffset();
3984
3985 /* Compute GPR index associated with argument offset. */
3986 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3987 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3988 }
3989
3990 // FIXME the codegen can be much improved in some cases.
3991 // We do not have to keep everything in memory.
3992 if (Flags.isByVal()) {
3993 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3993, __PRETTY_FUNCTION__))
;
3994
3995 if (CallConv == CallingConv::Fast)
3996 ComputeArgOffset();
3997
3998 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3999 ObjSize = Flags.getByValSize();
4000 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4001 // Empty aggregate parameters do not take up registers. Examples:
4002 // struct { } a;
4003 // union { } b;
4004 // int c[0];
4005 // etc. However, we have to provide a place-holder in InVals, so
4006 // pretend we have an 8-byte item at the current address for that
4007 // purpose.
4008 if (!ObjSize) {
4009 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4010 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4011 InVals.push_back(FIN);
4012 continue;
4013 }
4014
4015 // Create a stack object covering all stack doublewords occupied
4016 // by the argument. If the argument is (fully or partially) on
4017 // the stack, or if the argument is fully in registers but the
4018 // caller has allocated the parameter save anyway, we can refer
4019 // directly to the caller's stack frame. Otherwise, create a
4020 // local copy in our own frame.
4021 int FI;
4022 if (HasParameterArea ||
4023 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4024 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4025 else
4026 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4027 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4028
4029 // Handle aggregates smaller than 8 bytes.
4030 if (ObjSize < PtrByteSize) {
4031 // The value of the object is its address, which differs from the
4032 // address of the enclosing doubleword on big-endian systems.
4033 SDValue Arg = FIN;
4034 if (!isLittleEndian) {
4035 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4036 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4037 }
4038 InVals.push_back(Arg);
4039
4040 if (GPR_idx != Num_GPR_Regs) {
4041 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4042 FuncInfo->addLiveInAttr(VReg, Flags);
4043 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4044 SDValue Store;
4045
4046 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4047 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4048 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4049 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4050 MachinePointerInfo(&*FuncArg), ObjType);
4051 } else {
4052 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4053 // store the whole register as-is to the parameter save area
4054 // slot.
4055 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4056 MachinePointerInfo(&*FuncArg));
4057 }
4058
4059 MemOps.push_back(Store);
4060 }
4061 // Whether we copied from a register or not, advance the offset
4062 // into the parameter save area by a full doubleword.
4063 ArgOffset += PtrByteSize;
4064 continue;
4065 }
4066
4067 // The value of the object is its address, which is the address of
4068 // its first stack doubleword.
4069 InVals.push_back(FIN);
4070
4071 // Store whatever pieces of the object are in registers to memory.
4072 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4073 if (GPR_idx == Num_GPR_Regs)
4074 break;
4075
4076 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4077 FuncInfo->addLiveInAttr(VReg, Flags);
4078 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4079 SDValue Addr = FIN;
4080 if (j) {
4081 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4082 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4083 }
4084 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4085 MachinePointerInfo(&*FuncArg, j));
4086 MemOps.push_back(Store);
4087 ++GPR_idx;
4088 }
4089 ArgOffset += ArgSize;
4090 continue;
4091 }
4092
4093 switch (ObjectVT.getSimpleVT().SimpleTy) {
4094 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4094)
;
4095 case MVT::i1:
4096 case MVT::i32:
4097 case MVT::i64:
4098 if (Flags.isNest()) {
4099 // The 'nest' parameter, if any, is passed in R11.
4100 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4101 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4102
4103 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4104 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4105
4106 break;
4107 }
4108
4109 // These can be scalar arguments or elements of an integer array type
4110 // passed directly. Clang may use those instead of "byval" aggregate
4111 // types to avoid forcing arguments to memory unnecessarily.
4112 if (GPR_idx != Num_GPR_Regs) {
4113 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4114 FuncInfo->addLiveInAttr(VReg, Flags);
4115 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4116
4117 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4118 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4119 // value to MVT::i64 and then truncate to the correct register size.
4120 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4121 } else {
4122 if (CallConv == CallingConv::Fast)
4123 ComputeArgOffset();
4124
4125 needsLoad = true;
4126 ArgSize = PtrByteSize;
4127 }
4128 if (CallConv != CallingConv::Fast || needsLoad)
4129 ArgOffset += 8;
4130 break;
4131
4132 case MVT::f32:
4133 case MVT::f64:
4134 // These can be scalar arguments or elements of a float array type
4135 // passed directly. The latter are used to implement ELFv2 homogenous
4136 // float aggregates.
4137 if (FPR_idx != Num_FPR_Regs) {
4138 unsigned VReg;
4139
4140 if (ObjectVT == MVT::f32)
4141 VReg = MF.addLiveIn(FPR[FPR_idx],
4142 Subtarget.hasP8Vector()
4143 ? &PPC::VSSRCRegClass
4144 : &PPC::F4RCRegClass);
4145 else
4146 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4147 ? &PPC::VSFRCRegClass
4148 : &PPC::F8RCRegClass);
4149
4150 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4151 ++FPR_idx;
4152 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4153 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4154 // once we support fp <-> gpr moves.
4155
4156 // This can only ever happen in the presence of f32 array types,
4157 // since otherwise we never run out of FPRs before running out
4158 // of GPRs.
4159 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4160 FuncInfo->addLiveInAttr(VReg, Flags);
4161 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4162
4163 if (ObjectVT == MVT::f32) {
4164 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4165 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4166 DAG.getConstant(32, dl, MVT::i32));
4167 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4168 }
4169
4170 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4171 } else {
4172 if (CallConv == CallingConv::Fast)
4173 ComputeArgOffset();
4174
4175 needsLoad = true;
4176 }
4177
4178 // When passing an array of floats, the array occupies consecutive
4179 // space in the argument area; only round up to the next doubleword
4180 // at the end of the array. Otherwise, each float takes 8 bytes.
4181 if (CallConv != CallingConv::Fast || needsLoad) {
4182 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4183 ArgOffset += ArgSize;
4184 if (Flags.isInConsecutiveRegsLast())
4185 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4186 }
4187 break;
4188 case MVT::v4f32:
4189 case MVT::v4i32:
4190 case MVT::v8i16:
4191 case MVT::v16i8:
4192 case MVT::v2f64:
4193 case MVT::v2i64:
4194 case MVT::v1i128:
4195 case MVT::f128:
4196 // These can be scalar arguments or elements of a vector array type
4197 // passed directly. The latter are used to implement ELFv2 homogenous
4198 // vector aggregates.
4199 if (VR_idx != Num_VR_Regs) {
4200 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4201 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4202 ++VR_idx;
4203 } else {
4204 if (CallConv == CallingConv::Fast)
4205 ComputeArgOffset();
4206 needsLoad = true;
4207 }
4208 if (CallConv != CallingConv::Fast || needsLoad)
4209 ArgOffset += 16;
4210 break;
4211 }
4212
4213 // We need to load the argument to a virtual register if we determined
4214 // above that we ran out of physical registers of the appropriate type.
4215 if (needsLoad) {
4216 if (ObjSize < ArgSize && !isLittleEndian)
4217 CurArgOffset += ArgSize - ObjSize;
4218 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4219 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4220 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4221 }
4222
4223 InVals.push_back(ArgVal);
4224 }
4225
4226 // Area that is at least reserved in the caller of this function.
4227 unsigned MinReservedArea;
4228 if (HasParameterArea)
4229 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4230 else
4231 MinReservedArea = LinkageSize;
4232
4233 // Set the size that is at least reserved in caller of this function. Tail
4234 // call optimized functions' reserved stack space needs to be aligned so that
4235 // taking the difference between two stack areas will result in an aligned
4236 // stack.
4237 MinReservedArea =
4238 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4239 FuncInfo->setMinReservedArea(MinReservedArea);
4240
4241 // If the function takes variable number of arguments, make a frame index for
4242 // the start of the first vararg value... for expansion of llvm.va_start.
4243 // On ELFv2ABI spec, it writes:
4244 // C programs that are intended to be *portable* across different compilers
4245 // and architectures must use the header file <stdarg.h> to deal with variable
4246 // argument lists.
4247 if (isVarArg && MFI.hasVAStart()) {
4248 int Depth = ArgOffset;
4249
4250 FuncInfo->setVarArgsFrameIndex(
4251 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4252 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4253
4254 // If this function is vararg, store any remaining integer argument regs
4255 // to their spots on the stack so that they may be loaded by dereferencing
4256 // the result of va_next.
4257 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4258 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4259 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4260 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4261 SDValue Store =
4262 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4263 MemOps.push_back(Store);
4264 // Increment the address by four for the next argument to store
4265 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4266 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4267 }
4268 }
4269
4270 if (!MemOps.empty())
4271 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4272
4273 return Chain;
4274}
4275
4276SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4277 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4278 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4279 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4280 // TODO: add description of PPC stack frame format, or at least some docs.
4281 //
4282 MachineFunction &MF = DAG.getMachineFunction();
4283 MachineFrameInfo &MFI = MF.getFrameInfo();
4284 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4285
4286 EVT PtrVT = getPointerTy(MF.getDataLayout());
4287 bool isPPC64 = PtrVT == MVT::i64;
4288 // Potential tail calls could cause overwriting of argument stack slots.
4289 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4290 (CallConv == CallingConv::Fast));
4291 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4292 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4293 unsigned ArgOffset = LinkageSize;
4294 // Area that is at least reserved in caller of this function.
4295 unsigned MinReservedArea = ArgOffset;
4296
4297 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4298 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4299 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4300 };
4301 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4302 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4303 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4304 };
4305 static const MCPhysReg VR[] = {
4306 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4307 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4308 };
4309
4310 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4311 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4312 const unsigned Num_VR_Regs = array_lengthof( VR);
4313
4314 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4315
4316 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4317
4318 // In 32-bit non-varargs functions, the stack space for vectors is after the
4319 // stack space for non-vectors. We do not use this space unless we have
4320 // too many vectors to fit in registers, something that only occurs in
4321 // constructed examples:), but we have to walk the arglist to figure
4322 // that out...for the pathological case, compute VecArgOffset as the
4323 // start of the vector parameter area. Computing VecArgOffset is the
4324 // entire point of the following loop.
4325 unsigned VecArgOffset = ArgOffset;
4326 if (!isVarArg && !isPPC64) {
4327 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4328 ++ArgNo) {
4329 EVT ObjectVT = Ins[ArgNo].VT;
4330 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4331
4332 if (Flags.isByVal()) {
4333 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4334 unsigned ObjSize = Flags.getByValSize();
4335 unsigned ArgSize =
4336 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4337 VecArgOffset += ArgSize;
4338 continue;
4339 }
4340
4341 switch(ObjectVT.getSimpleVT().SimpleTy) {
4342 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4342)
;
4343 case MVT::i1:
4344 case MVT::i32:
4345 case MVT::f32:
4346 VecArgOffset += 4;
4347 break;
4348 case MVT::i64: // PPC64
4349 case MVT::f64:
4350 // FIXME: We are guaranteed to be !isPPC64 at this point.
4351 // Does MVT::i64 apply?
4352 VecArgOffset += 8;
4353 break;
4354 case MVT::v4f32:
4355 case MVT::v4i32:
4356 case MVT::v8i16:
4357 case MVT::v16i8:
4358 // Nothing to do, we're only looking at Nonvector args here.
4359 break;
4360 }
4361 }
4362 }
4363 // We've found where the vector parameter area in memory is. Skip the
4364 // first 12 parameters; these don't use that memory.
4365 VecArgOffset = ((VecArgOffset+15)/16)*16;
4366 VecArgOffset += 12*16;
4367
4368 // Add DAG nodes to load the arguments or copy them out of registers. On
4369 // entry to a function on PPC, the arguments start after the linkage area,
4370 // although the first ones are often in registers.
4371
4372 SmallVector<SDValue, 8> MemOps;
4373 unsigned nAltivecParamsAtEnd = 0;
4374 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4375 unsigned CurArgIdx = 0;
4376 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4377 SDValue ArgVal;
4378 bool needsLoad = false;
4379 EVT ObjectVT = Ins[ArgNo].VT;
4380 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4381 unsigned ArgSize = ObjSize;
4382 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4383 if (Ins[ArgNo].isOrigArg()) {
4384 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4385 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4386 }
4387 unsigned CurArgOffset = ArgOffset;
4388
4389 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4390 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4391 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4392 if (isVarArg || isPPC64) {
4393 MinReservedArea = ((MinReservedArea+15)/16)*16;
4394 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4395 Flags,
4396 PtrByteSize);
4397 } else nAltivecParamsAtEnd++;
4398 } else
4399 // Calculate min reserved area.
4400 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4401 Flags,
4402 PtrByteSize);
4403
4404 // FIXME the codegen can be much improved in some cases.
4405 // We do not have to keep everything in memory.
4406 if (Flags.isByVal()) {
4407 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4407, __PRETTY_FUNCTION__))
;
4408
4409 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4410 ObjSize = Flags.getByValSize();
4411 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4412 // Objects of size 1 and 2 are right justified, everything else is
4413 // left justified. This means the memory address is adjusted forwards.
4414 if (ObjSize==1 || ObjSize==2) {
4415 CurArgOffset = CurArgOffset + (4 - ObjSize);
4416 }
4417 // The value of the object is its address.
4418 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4419 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4420 InVals.push_back(FIN);
4421 if (ObjSize==1 || ObjSize==2) {
4422 if (GPR_idx != Num_GPR_Regs) {
4423 unsigned VReg;
4424 if (isPPC64)
4425 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4426 else
4427 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4428 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4429 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4430 SDValue Store =
4431 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4432 MachinePointerInfo(&*FuncArg), ObjType);
4433 MemOps.push_back(Store);
4434 ++GPR_idx;
4435 }
4436
4437 ArgOffset += PtrByteSize;
4438
4439 continue;
4440 }
4441 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4442 // Store whatever pieces of the object are in registers
4443 // to memory. ArgOffset will be the address of the beginning
4444 // of the object.
4445 if (GPR_idx != Num_GPR_Regs) {
4446 unsigned VReg;
4447 if (isPPC64)
4448 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4449 else
4450 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4451 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4452 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4453 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4454 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4455 MachinePointerInfo(&*FuncArg, j));
4456 MemOps.push_back(Store);
4457 ++GPR_idx;
4458 ArgOffset += PtrByteSize;
4459 } else {
4460 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4461 break;
4462 }
4463 }
4464 continue;
4465 }
4466
4467 switch (ObjectVT.getSimpleVT().SimpleTy) {
4468 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4468)
;
4469 case MVT::i1:
4470 case MVT::i32:
4471 if (!isPPC64) {
4472 if (GPR_idx != Num_GPR_Regs) {
4473 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4474 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4475
4476 if (ObjectVT == MVT::i1)
4477 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4478
4479 ++GPR_idx;
4480 } else {
4481 needsLoad = true;
4482 ArgSize = PtrByteSize;
4483 }
4484 // All int arguments reserve stack space in the Darwin ABI.
4485 ArgOffset += PtrByteSize;
4486 break;
4487 }
4488 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4489 case MVT::i64: // PPC64
4490 if (GPR_idx != Num_GPR_Regs) {
4491 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4492 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4493
4494 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4495 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4496 // value to MVT::i64 and then truncate to the correct register size.
4497 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4498
4499 ++GPR_idx;
4500 } else {
4501 needsLoad = true;
4502 ArgSize = PtrByteSize;
4503 }
4504 // All int arguments reserve stack space in the Darwin ABI.
4505 ArgOffset += 8;
4506 break;
4507
4508 case MVT::f32:
4509 case MVT::f64:
4510 // Every 4 bytes of argument space consumes one of the GPRs available for
4511 // argument passing.
4512 if (GPR_idx != Num_GPR_Regs) {
4513 ++GPR_idx;
4514 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4515 ++GPR_idx;
4516 }
4517 if (FPR_idx != Num_FPR_Regs) {
4518 unsigned VReg;
4519
4520 if (ObjectVT == MVT::f32)
4521 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4522 else
4523 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4524
4525 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4526 ++FPR_idx;
4527 } else {
4528 needsLoad = true;
4529 }
4530
4531 // All FP arguments reserve stack space in the Darwin ABI.
4532 ArgOffset += isPPC64 ? 8 : ObjSize;
4533 break;
4534 case MVT::v4f32:
4535 case MVT::v4i32:
4536 case MVT::v8i16:
4537 case MVT::v16i8:
4538 // Note that vector arguments in registers don't reserve stack space,
4539 // except in varargs functions.
4540 if (VR_idx != Num_VR_Regs) {
4541 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4542 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4543 if (isVarArg) {
4544 while ((ArgOffset % 16) != 0) {
4545 ArgOffset += PtrByteSize;
4546 if (GPR_idx != Num_GPR_Regs)
4547 GPR_idx++;
4548 }
4549 ArgOffset += 16;
4550 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4551 }
4552 ++VR_idx;
4553 } else {
4554 if (!isVarArg && !isPPC64) {
4555 // Vectors go after all the nonvectors.
4556 CurArgOffset = VecArgOffset;
4557 VecArgOffset += 16;
4558 } else {
4559 // Vectors are aligned.
4560 ArgOffset = ((ArgOffset+15)/16)*16;
4561 CurArgOffset = ArgOffset;
4562 ArgOffset += 16;
4563 }
4564 needsLoad = true;
4565 }
4566 break;
4567 }
4568
4569 // We need to load the argument to a virtual register if we determined above
4570 // that we ran out of physical registers of the appropriate type.
4571 if (needsLoad) {
4572 int FI = MFI.CreateFixedObject(ObjSize,
4573 CurArgOffset + (ArgSize - ObjSize),
4574 isImmutable);
4575 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4576 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4577 }
4578
4579 InVals.push_back(ArgVal);
4580 }
4581
4582 // Allow for Altivec parameters at the end, if needed.
4583 if (nAltivecParamsAtEnd) {
4584 MinReservedArea = ((MinReservedArea+15)/16)*16;
4585 MinReservedArea += 16*nAltivecParamsAtEnd;
4586 }
4587
4588 // Area that is at least reserved in the caller of this function.
4589 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4590
4591 // Set the size that is at least reserved in caller of this function. Tail
4592 // call optimized functions' reserved stack space needs to be aligned so that
4593 // taking the difference between two stack areas will result in an aligned
4594 // stack.
4595 MinReservedArea =
4596 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4597 FuncInfo->setMinReservedArea(MinReservedArea);
4598
4599 // If the function takes variable number of arguments, make a frame index for
4600 // the start of the first vararg value... for expansion of llvm.va_start.
4601 if (isVarArg) {
4602 int Depth = ArgOffset;
4603
4604 FuncInfo->setVarArgsFrameIndex(
4605 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4606 Depth, true));
4607 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4608
4609 // If this function is vararg, store any remaining integer argument regs
4610 // to their spots on the stack so that they may be loaded by dereferencing
4611 // the result of va_next.
4612 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4613 unsigned VReg;
4614
4615 if (isPPC64)
4616 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4617 else
4618 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4619
4620 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4621 SDValue Store =
4622 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4623 MemOps.push_back(Store);
4624 // Increment the address by four for the next argument to store
4625 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4626 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4627 }
4628 }
4629
4630 if (!MemOps.empty())
4631 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4632
4633 return Chain;
4634}
4635
4636/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4637/// adjusted to accommodate the arguments for the tailcall.
4638static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4639 unsigned ParamSize) {
4640
4641 if (!isTailCall) return 0;
4642
4643 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4644 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4645 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4646 // Remember only if the new adjustment is bigger.
4647 if (SPDiff < FI->getTailCallSPDelta())
4648 FI->setTailCallSPDelta(SPDiff);
4649
4650 return SPDiff;
4651}
4652
4653static bool isFunctionGlobalAddress(SDValue Callee);
4654
4655static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4656 const TargetMachine &TM) {
4657 // It does not make sense to call callsShareTOCBase() with a caller that
4658 // is PC Relative since PC Relative callers do not have a TOC.
4659#ifndef NDEBUG
4660 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4661 assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4662, __PRETTY_FUNCTION__))
4662 "PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4662, __PRETTY_FUNCTION__))
;
4663#endif
4664
4665 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4666 // don't have enough information to determine if the caller and callee share
4667 // the same TOC base, so we have to pessimistically assume they don't for
4668 // correctness.
4669 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4670 if (!G)
4671 return false;
4672
4673 const GlobalValue *GV = G->getGlobal();
4674
4675 // If the callee is preemptable, then the static linker will use a plt-stub
4676 // which saves the toc to the stack, and needs a nop after the call
4677 // instruction to convert to a toc-restore.
4678 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4679 return false;
4680
4681 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4682 // We may need a TOC restore in the situation where the caller requires a
4683 // valid TOC but the callee is PC Relative and does not.
4684 const Function *F = dyn_cast<Function>(GV);
4685 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4686
4687 // If we have an Alias we can try to get the function from there.
4688 if (Alias) {
4689 const GlobalObject *GlobalObj = Alias->getBaseObject();
4690 F = dyn_cast<Function>(GlobalObj);
4691 }
4692
4693 // If we still have no valid function pointer we do not have enough
4694 // information to determine if the callee uses PC Relative calls so we must
4695 // assume that it does.
4696 if (!F)
4697 return false;
4698
4699 // If the callee uses PC Relative we cannot guarantee that the callee won't
4700 // clobber the TOC of the caller and so we must assume that the two
4701 // functions do not share a TOC base.
4702 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4703 if (STICallee->isUsingPCRelativeCalls())
4704 return false;
4705
4706 // The medium and large code models are expected to provide a sufficiently
4707 // large TOC to provide all data addressing needs of a module with a
4708 // single TOC.
4709 if (CodeModel::Medium == TM.getCodeModel() ||
4710 CodeModel::Large == TM.getCodeModel())
4711 return true;
4712
4713 // Otherwise we need to ensure callee and caller are in the same section,
4714 // since the linker may allocate multiple TOCs, and we don't know which
4715 // sections will belong to the same TOC base.
4716 if (!GV->isStrongDefinitionForLinker())
4717 return false;
4718
4719 // Any explicitly-specified sections and section prefixes must also match.
4720 // Also, if we're using -ffunction-sections, then each function is always in
4721 // a different section (the same is true for COMDAT functions).
4722 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4723 GV->getSection() != Caller->getSection())
4724 return false;
4725 if (const auto *F = dyn_cast<Function>(GV)) {
4726 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4727 return false;
4728 }
4729
4730 return true;
4731}
4732
4733static bool
4734needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4735 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4736 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4736, __PRETTY_FUNCTION__))
;
4737
4738 const unsigned PtrByteSize = 8;
4739 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4740
4741 static const MCPhysReg GPR[] = {
4742 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4743 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4744 };
4745 static const MCPhysReg VR[] = {
4746 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4747 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4748 };
4749
4750 const unsigned NumGPRs = array_lengthof(GPR);
4751 const unsigned NumFPRs = 13;
4752 const unsigned NumVRs = array_lengthof(VR);
4753 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4754
4755 unsigned NumBytes = LinkageSize;
4756 unsigned AvailableFPRs = NumFPRs;
4757 unsigned AvailableVRs = NumVRs;
4758
4759 for (const ISD::OutputArg& Param : Outs) {
4760 if (Param.Flags.isNest()) continue;
4761
4762 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4763 LinkageSize, ParamAreaSize, NumBytes,
4764 AvailableFPRs, AvailableVRs))
4765 return true;
4766 }
4767 return false;
4768}
4769
4770static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4771 if (CB.arg_size() != CallerFn->arg_size())
4772 return false;
4773
4774 auto CalleeArgIter = CB.arg_begin();
4775 auto CalleeArgEnd = CB.arg_end();
4776 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4777
4778 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4779 const Value* CalleeArg = *CalleeArgIter;
4780 const Value* CallerArg = &(*CallerArgIter);
4781 if (CalleeArg == CallerArg)
4782 continue;
4783
4784 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4785 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4786 // }
4787 // 1st argument of callee is undef and has the same type as caller.
4788 if (CalleeArg->getType() == CallerArg->getType() &&
4789 isa<UndefValue>(CalleeArg))
4790 continue;
4791
4792 return false;
4793 }
4794
4795 return true;
4796}
4797
4798// Returns true if TCO is possible between the callers and callees
4799// calling conventions.
4800static bool
4801areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4802 CallingConv::ID CalleeCC) {
4803 // Tail calls are possible with fastcc and ccc.
4804 auto isTailCallableCC = [] (CallingConv::ID CC){
4805 return CC == CallingConv::C || CC == CallingConv::Fast;
4806 };
4807 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4808 return false;
4809
4810 // We can safely tail call both fastcc and ccc callees from a c calling
4811 // convention caller. If the caller is fastcc, we may have less stack space
4812 // than a non-fastcc caller with the same signature so disable tail-calls in
4813 // that case.
4814 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4815}
4816
4817bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4818 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4819 const SmallVectorImpl<ISD::OutputArg> &Outs,
4820 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4821 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4822
4823 if (DisableSCO && !TailCallOpt) return false;
4824
4825 // Variadic argument functions are not supported.
4826 if (isVarArg) return false;
4827
4828 auto &Caller = DAG.getMachineFunction().getFunction();
4829 // Check that the calling conventions are compatible for tco.
4830 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4831 return false;
4832
4833 // Caller contains any byval parameter is not supported.
4834 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4835 return false;
4836
4837 // Callee contains any byval parameter is not supported, too.
4838 // Note: This is a quick work around, because in some cases, e.g.
4839 // caller's stack size > callee's stack size, we are still able to apply
4840 // sibling call optimization. For example, gcc is able to do SCO for caller1
4841 // in the following example, but not for caller2.
4842 // struct test {
4843 // long int a;
4844 // char ary[56];
4845 // } gTest;
4846 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4847 // b->a = v.a;
4848 // return 0;
4849 // }
4850 // void caller1(struct test a, struct test c, struct test *b) {
4851 // callee(gTest, b); }
4852 // void caller2(struct test *b) { callee(gTest, b); }
4853 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4854 return false;
4855
4856 // If callee and caller use different calling conventions, we cannot pass
4857 // parameters on stack since offsets for the parameter area may be different.
4858 if (Caller.getCallingConv() != CalleeCC &&
4859 needStackSlotPassParameters(Subtarget, Outs))
4860 return false;
4861
4862 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4863 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4864 // callee potentially have different TOC bases then we cannot tail call since
4865 // we need to restore the TOC pointer after the call.
4866 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4867 // We cannot guarantee this for indirect calls or calls to external functions.
4868 // When PC-Relative addressing is used, the concept of the TOC is no longer
4869 // applicable so this check is not required.
4870 // Check first for indirect calls.
4871 if (!Subtarget.isUsingPCRelativeCalls() &&
4872 !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4873 return false;
4874
4875 // Check if we share the TOC base.
4876 if (!Subtarget.isUsingPCRelativeCalls() &&
4877 !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4878 return false;
4879
4880 // TCO allows altering callee ABI, so we don't have to check further.
4881 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4882 return true;
4883
4884 if (DisableSCO) return false;
4885
4886 // If callee use the same argument list that caller is using, then we can
4887 // apply SCO on this case. If it is not, then we need to check if callee needs
4888 // stack for passing arguments.
4889 // PC Relative tail calls may not have a CallBase.
4890 // If there is no CallBase we cannot verify if we have the same argument
4891 // list so assume that we don't have the same argument list.
4892 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4893 needStackSlotPassParameters(Subtarget, Outs))
4894 return false;
4895 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4896 return false;
4897
4898 return true;
4899}
4900
4901/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4902/// for tail call optimization. Targets which want to do tail call
4903/// optimization should implement this function.
4904bool
4905PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4906 CallingConv::ID CalleeCC,
4907 bool isVarArg,
4908 const SmallVectorImpl<ISD::InputArg> &Ins,
4909 SelectionDAG& DAG) const {
4910 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4911 return false;
4912
4913 // Variable argument functions are not supported.
4914 if (isVarArg)
4915 return false;
4916
4917 MachineFunction &MF = DAG.getMachineFunction();
4918 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4919 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4920 // Functions containing by val parameters are not supported.
4921 for (unsigned i = 0; i != Ins.size(); i++) {
4922 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4923 if (Flags.isByVal()) return false;
4924 }
4925
4926 // Non-PIC/GOT tail calls are supported.
4927 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4928 return true;
4929
4930 // At the moment we can only do local tail calls (in same module, hidden
4931 // or protected) if we are generating PIC.
4932 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4933 return G->getGlobal()->hasHiddenVisibility()
4934 || G->getGlobal()->hasProtectedVisibility();
4935 }
4936
4937 return false;
4938}
4939
4940/// isCallCompatibleAddress - Return the immediate to use if the specified
4941/// 32-bit value is representable in the immediate field of a BxA instruction.
4942static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4943 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4944 if (!C) return nullptr;
4945
4946 int Addr = C->getZExtValue();
4947 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4948 SignExtend32<26>(Addr) != Addr)
4949 return nullptr; // Top 6 bits have to be sext of immediate.
4950
4951 return DAG
4952 .getConstant(
4953 (int)C->getZExtValue() >> 2, SDLoc(Op),
4954 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4955 .getNode();
4956}
4957
4958namespace {
4959
4960struct TailCallArgumentInfo {
4961 SDValue Arg;
4962 SDValue FrameIdxOp;
4963 int FrameIdx = 0;
4964
4965 TailCallArgumentInfo() = default;
4966};
4967
4968} // end anonymous namespace
4969
4970/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4971static void StoreTailCallArgumentsToStackSlot(
4972 SelectionDAG &DAG, SDValue Chain,
4973 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4974 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4975 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4976 SDValue Arg = TailCallArgs[i].Arg;
4977 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4978 int FI = TailCallArgs[i].FrameIdx;
4979 // Store relative to framepointer.
4980 MemOpChains.push_back(DAG.getStore(
4981 Chain, dl, Arg, FIN,
4982 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4983 }
4984}
4985
4986/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4987/// the appropriate stack slot for the tail call optimized function call.
4988static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4989 SDValue OldRetAddr, SDValue OldFP,
4990 int SPDiff, const SDLoc &dl) {
4991 if (SPDiff) {
4992 // Calculate the new stack slot for the return address.
4993 MachineFunction &MF = DAG.getMachineFunction();
4994 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4995 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4996 bool isPPC64 = Subtarget.isPPC64();
4997 int SlotSize = isPPC64 ? 8 : 4;
4998 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4999 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5000 NewRetAddrLoc, true);
5001 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5002 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5003 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5004 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5005 }
5006 return Chain;
5007}
5008
5009/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5010/// the position of the argument.
5011static void
5012CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5013 SDValue Arg, int SPDiff, unsigned ArgOffset,
5014 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5015 int Offset = ArgOffset + SPDiff;
5016 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5017 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5018 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5019 SDValue FIN = DAG.getFrameIndex(FI, VT);
5020 TailCallArgumentInfo Info;
5021 Info.Arg = Arg;
5022 Info.FrameIdxOp = FIN;
5023 Info.FrameIdx = FI;
5024 TailCallArguments.push_back(Info);
5025}
5026
5027/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5028/// stack slot. Returns the chain as result and the loaded frame pointers in
5029/// LROpOut/FPOpout. Used when tail calling.
5030SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5031 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5032 SDValue &FPOpOut, const SDLoc &dl) const {
5033 if (SPDiff) {
5034 // Load the LR and FP stack slot for later adjusting.
5035 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5036 LROpOut = getReturnAddrFrameIndex(DAG);
5037 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5038 Chain = SDValue(LROpOut.getNode(), 1);
5039 }
5040 return Chain;
5041}
5042
5043/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5044/// by "Src" to address "Dst" of size "Size". Alignment information is
5045/// specified by the specific parameter attribute. The copy will be passed as
5046/// a byval function parameter.
5047/// Sometimes what we are copying is the end of a larger object, the part that
5048/// does not fit in registers.
5049static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5050 SDValue Chain, ISD::ArgFlagsTy Flags,
5051 SelectionDAG &DAG, const SDLoc &dl) {
5052 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5053 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5054 Flags.getNonZeroByValAlign(), false, false, false,
5055 MachinePointerInfo(), MachinePointerInfo());
5056}
5057
5058/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5059/// tail calls.
5060static void LowerMemOpCallTo(
5061 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5062 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5063 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5064 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5065 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5066 if (!isTailCall) {
5067 if (isVector) {
5068 SDValue StackPtr;
5069 if (isPPC64)
5070 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5071 else
5072 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5073 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5074 DAG.getConstant(ArgOffset, dl, PtrVT));
5075 }
5076 MemOpChains.push_back(
5077 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5078 // Calculate and remember argument location.
5079 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5080 TailCallArguments);
5081}
5082
5083static void
5084PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5085 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5086 SDValue FPOp,
5087 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5088 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5089 // might overwrite each other in case of tail call optimization.
5090 SmallVector<SDValue, 8> MemOpChains2;
5091 // Do not flag preceding copytoreg stuff together with the following stuff.
5092 InFlag = SDValue();
5093 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5094 MemOpChains2, dl);
5095 if (!MemOpChains2.empty())
5096 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5097
5098 // Store the return address to the appropriate stack slot.
5099 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5100
5101 // Emit callseq_end just before tailcall node.
5102 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5103 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5104 InFlag = Chain.getValue(1);
5105}
5106
5107// Is this global address that of a function that can be called by name? (as
5108// opposed to something that must hold a descriptor for an indirect call).
5109static bool isFunctionGlobalAddress(SDValue Callee) {
5110 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5111 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5112 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5113 return false;
5114
5115 return G->getGlobal()->getValueType()->isFunctionTy();
5116 }
5117
5118 return false;
5119}
5120
5121SDValue PPCTargetLowering::LowerCallResult(
5122 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5124 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5125 SmallVector<CCValAssign, 16> RVLocs;
5126 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5127 *DAG.getContext());
5128
5129 CCRetInfo.AnalyzeCallResult(
5130 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5131 ? RetCC_PPC_Cold
5132 : RetCC_PPC);
5133
5134 // Copy all of the result registers out of their specified physreg.
5135 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5136 CCValAssign &VA = RVLocs[i];
5137 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5137, __PRETTY_FUNCTION__))
;
5138
5139 SDValue Val;
5140
5141 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5142 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5143 InFlag);
5144 Chain = Lo.getValue(1);
5145 InFlag = Lo.getValue(2);
5146 VA = RVLocs[++i]; // skip ahead to next loc
5147 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5148 InFlag);
5149 Chain = Hi.getValue(1);
5150 InFlag = Hi.getValue(2);
5151 if (!Subtarget.isLittleEndian())
5152 std::swap (Lo, Hi);
5153 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5154 } else {
5155 Val = DAG.getCopyFromReg(Chain, dl,
5156 VA.getLocReg(), VA.getLocVT(), InFlag);
5157 Chain = Val.getValue(1);
5158 InFlag = Val.getValue(2);
5159 }
5160
5161 switch (VA.getLocInfo()) {
5162 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5162)
;
5163 case CCValAssign::Full: break;
5164 case CCValAssign::AExt:
5165 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5166 break;
5167 case CCValAssign::ZExt:
5168 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5169 DAG.getValueType(VA.getValVT()));
5170 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5171 break;
5172 case CCValAssign::SExt:
5173 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5174 DAG.getValueType(VA.getValVT()));
5175 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5176 break;
5177 }
5178
5179 InVals.push_back(Val);
5180 }
5181
5182 return Chain;
5183}
5184
5185static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5186 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5187 // PatchPoint calls are not indirect.
5188 if (isPatchPoint)
5189 return false;
5190
5191 if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5192 return false;
5193
5194 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5195 // becuase the immediate function pointer points to a descriptor instead of
5196 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5197 // pointer immediate points to the global entry point, while the BLA would
5198 // need to jump to the local entry point (see rL211174).
5199 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5200 isBLACompatibleAddress(Callee, DAG))
5201 return false;
5202
5203 return true;
5204}
5205
5206// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5207static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5208 return Subtarget.isAIXABI() ||
5209 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5210}
5211
5212static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5213 const Function &Caller,
5214 const SDValue &Callee,
5215 const PPCSubtarget &Subtarget,
5216 const TargetMachine &TM) {
5217 if (CFlags.IsTailCall)
5218 return PPCISD::TC_RETURN;
5219
5220 // This is a call through a function pointer.
5221 if (CFlags.IsIndirect) {
5222 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5223 // indirect calls. The save of the caller's TOC pointer to the stack will be
5224 // inserted into the DAG as part of call lowering. The restore of the TOC
5225 // pointer is modeled by using a pseudo instruction for the call opcode that
5226 // represents the 2 instruction sequence of an indirect branch and link,
5227 // immediately followed by a load of the TOC pointer from the the stack save
5228 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5229 // as it is not saved or used.
5230 return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5231 : PPCISD::BCTRL;
5232 }
5233
5234 if (Subtarget.isUsingPCRelativeCalls()) {
5235 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5235, __PRETTY_FUNCTION__))
;
5236 return PPCISD::CALL_NOTOC;
5237 }
5238
5239 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5240 // immediately following the call instruction if the caller and callee may
5241 // have different TOC bases. At link time if the linker determines the calls
5242 // may not share a TOC base, the call is redirected to a trampoline inserted
5243 // by the linker. The trampoline will (among other things) save the callers
5244 // TOC pointer at an ABI designated offset in the linkage area and the linker
5245 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5246 // into gpr2.
5247 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5248 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5249 : PPCISD::CALL_NOP;
5250
5251 return PPCISD::CALL;
5252}
5253
5254static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5255 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5256 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5257 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5258 return SDValue(Dest, 0);
5259
5260 // Returns true if the callee is local, and false otherwise.
5261 auto isLocalCallee = [&]() {
5262 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5263 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5264 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5265
5266 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5267 !dyn_cast_or_null<GlobalIFunc>(GV);
5268 };
5269
5270 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5271 // a static relocation model causes some versions of GNU LD (2.17.50, at
5272 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5273 // built with secure-PLT.
5274 bool UsePlt =
5275 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5276 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5277
5278 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5279 const TargetMachine &TM = Subtarget.getTargetMachine();
5280 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5281 MCSymbolXCOFF *S =
5282 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5283
5284 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5285 return DAG.getMCSymbol(S, PtrVT);
5286 };
5287
5288 if (isFunctionGlobalAddress(Callee)) {
5289 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5290
5291 if (Subtarget.isAIXABI()) {
5292 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5292, __PRETTY_FUNCTION__))
;
5293 return getAIXFuncEntryPointSymbolSDNode(GV);
5294 }
5295 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5296 UsePlt ? PPCII::MO_PLT : 0);
5297 }
5298
5299 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5300 const char *SymName = S->getSymbol();
5301 if (Subtarget.isAIXABI()) {
5302 // If there exists a user-declared function whose name is the same as the
5303 // ExternalSymbol's, then we pick up the user-declared version.
5304 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5305 if (const Function *F =
5306 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5307 return getAIXFuncEntryPointSymbolSDNode(F);
5308
5309 // On AIX, direct function calls reference the symbol for the function's
5310 // entry point, which is named by prepending a "." before the function's
5311 // C-linkage name. A Qualname is returned here because an external
5312 // function entry point is a csect with XTY_ER property.
5313 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5314 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5315 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5316 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5317 SectionKind::getMetadata());
5318 return Sec->getQualNameSymbol();
5319 };
5320
5321 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5322 }
5323 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5324 UsePlt ? PPCII::MO_PLT : 0);
5325 }
5326
5327 // No transformation needed.
5328 assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
;
5329 return Callee;
5330}
5331
5332static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5333 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5334, __PRETTY_FUNCTION__))
5334 "Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5334, __PRETTY_FUNCTION__))
;
5335
5336 // The last operand is the chain, except when the node has glue. If the node
5337 // has glue, then the last operand is the glue, and the chain is the second
5338 // last operand.
5339 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5340 if (LastValue.getValueType() != MVT::Glue)
5341 return LastValue;
5342
5343 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5344}
5345
5346// Creates the node that moves a functions address into the count register
5347// to prepare for an indirect call instruction.
5348static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5349 SDValue &Glue, SDValue &Chain,
5350 const SDLoc &dl) {
5351 SDValue MTCTROps[] = {Chain, Callee, Glue};
5352 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5353 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5354 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5355 // The glue is the second value produced.
5356 Glue = Chain.getValue(1);
5357}
5358
5359static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5360 SDValue &Glue, SDValue &Chain,
5361 SDValue CallSeqStart,
5362 const CallBase *CB, const SDLoc &dl,
5363 bool hasNest,
5364 const PPCSubtarget &Subtarget) {
5365 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5366 // entry point, but to the function descriptor (the function entry point
5367 // address is part of the function descriptor though).
5368 // The function descriptor is a three doubleword structure with the
5369 // following fields: function entry point, TOC base address and
5370 // environment pointer.
5371 // Thus for a call through a function pointer, the following actions need
5372 // to be performed:
5373 // 1. Save the TOC of the caller in the TOC save area of its stack
5374 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5375 // 2. Load the address of the function entry point from the function
5376 // descriptor.
5377 // 3. Load the TOC of the callee from the function descriptor into r2.
5378 // 4. Load the environment pointer from the function descriptor into
5379 // r11.
5380 // 5. Branch to the function entry point address.
5381 // 6. On return of the callee, the TOC of the caller needs to be
5382 // restored (this is done in FinishCall()).
5383 //
5384 // The loads are scheduled at the beginning of the call sequence, and the
5385 // register copies are flagged together to ensure that no other
5386 // operations can be scheduled in between. E.g. without flagging the
5387 // copies together, a TOC access in the caller could be scheduled between
5388 // the assignment of the callee TOC and the branch to the callee, which leads
5389 // to incorrect code.
5390
5391 // Start by loading the function address from the descriptor.
5392 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5393 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5394 ? (MachineMemOperand::MODereferenceable |
5395 MachineMemOperand::MOInvariant)
5396 : MachineMemOperand::MONone;
5397
5398 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5399
5400 // Registers used in building the DAG.
5401 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5402 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5403
5404 // Offsets of descriptor members.
5405 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5406 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5407
5408 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5409 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5410
5411 // One load for the functions entry point address.
5412 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5413 Alignment, MMOFlags);
5414
5415 // One for loading the TOC anchor for the module that contains the called
5416 // function.
5417 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5418 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5419 SDValue TOCPtr =
5420 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5421 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5422
5423 // One for loading the environment pointer.
5424 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5425 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5426 SDValue LoadEnvPtr =
5427 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5428 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5429
5430
5431 // Then copy the newly loaded TOC anchor to the TOC pointer.
5432 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5433 Chain = TOCVal.getValue(0);
5434 Glue = TOCVal.getValue(1);
5435
5436 // If the function call has an explicit 'nest' parameter, it takes the
5437 // place of the environment pointer.
5438 assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5439, __PRETTY_FUNCTION__))
5439 "Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5439, __PRETTY_FUNCTION__))
;
5440 if (!hasNest) {
5441 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5442 Chain = EnvVal.getValue(0);
5443 Glue = EnvVal.getValue(1);
5444 }
5445
5446 // The rest of the indirect call sequence is the same as the non-descriptor
5447 // DAG.
5448 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5449}
5450
5451static void
5452buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5453 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5454 SelectionDAG &DAG,
5455 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5456 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5457 const PPCSubtarget &Subtarget) {
5458 const bool IsPPC64 = Subtarget.isPPC64();
5459 // MVT for a general purpose register.
5460 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5461
5462 // First operand is always the chain.
5463 Ops.push_back(Chain);
5464
5465 // If it's a direct call pass the callee as the second operand.
5466 if (!CFlags.IsIndirect)
5467 Ops.push_back(Callee);
5468 else {
5469 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5469, __PRETTY_FUNCTION__))
;
5470
5471 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5472 // on the stack (this would have been done in `LowerCall_64SVR4` or
5473 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5474 // represents both the indirect branch and a load that restores the TOC
5475 // pointer from the linkage area. The operand for the TOC restore is an add
5476 // of the TOC save offset to the stack pointer. This must be the second
5477 // operand: after the chain input but before any other variadic arguments.
5478 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5479 // saved or used.
5480 if (isTOCSaveRestoreRequired(Subtarget)) {
5481 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5482
5483 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5484 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5485 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5486 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5487 Ops.push_back(AddTOC);
5488 }
5489
5490 // Add the register used for the environment pointer.
5491 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5492 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5493 RegVT));
5494
5495
5496 // Add CTR register as callee so a bctr can be emitted later.
5497 if (CFlags.IsTailCall)
5498 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5499 }
5500
5501 // If this is a tail call add stack pointer delta.
5502 if (CFlags.IsTailCall)
5503 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5504
5505 // Add argument registers to the end of the list so that they are known live
5506 // into the call.
5507 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5508 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5509 RegsToPass[i].second.getValueType()));
5510
5511 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5512 // no way to mark dependencies as implicit here.
5513 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5514 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5515 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5516 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5517
5518 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5519 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5520 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5521
5522 // Add a register mask operand representing the call-preserved registers.
5523 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5524 const uint32_t *Mask =
5525 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5526 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5526, __PRETTY_FUNCTION__))
;
5527 Ops.push_back(DAG.getRegisterMask(Mask));
5528
5529 // If the glue is valid, it is the last operand.
5530 if (Glue.getNode())
5531 Ops.push_back(Glue);
5532}
5533
5534SDValue PPCTargetLowering::FinishCall(
5535 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5536 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5537 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5538 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5539 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5540
5541 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5542 Subtarget.isAIXABI())
5543 setUsesTOCBasePtr(DAG);
5544
5545 unsigned CallOpc =
5546 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5547 Subtarget, DAG.getTarget());
5548
5549 if (!CFlags.IsIndirect)
5550 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5551 else if (Subtarget.usesFunctionDescriptors())
5552 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5553 dl, CFlags.HasNest, Subtarget);
5554 else
5555 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5556
5557 // Build the operand list for the call instruction.
5558 SmallVector<SDValue, 8> Ops;
5559 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5560 SPDiff, Subtarget);
5561
5562 // Emit tail call.
5563 if (CFlags.IsTailCall) {
5564 // Indirect tail call when using PC Relative calls do not have the same
5565 // constraints.
5566 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5567 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5568 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5569 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5570 isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5571 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5572 "Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5573 "register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5574 "used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
;
5575 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5576 assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5577, __PRETTY_FUNCTION__))
5577 "Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5577, __PRETTY_FUNCTION__))
;
5578 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5579 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5580 }
5581
5582 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5583 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5584 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5585 Glue = Chain.getValue(1);
5586
5587 // When performing tail call optimization the callee pops its arguments off
5588 // the stack. Account for this here so these bytes can be pushed back on in
5589 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5590 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5591 getTargetMachine().Options.GuaranteedTailCallOpt)
5592 ? NumBytes
5593 : 0;
5594
5595 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5596 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5597 Glue, dl);
5598 Glue = Chain.getValue(1);
5599
5600 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5601 DAG, InVals);
5602}
5603
5604SDValue
5605PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5606 SmallVectorImpl<SDValue> &InVals) const {
5607 SelectionDAG &DAG = CLI.DAG;
5608 SDLoc &dl = CLI.DL;
5609 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5610 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5611 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5612 SDValue Chain = CLI.Chain;
5613 SDValue Callee = CLI.Callee;
5614 bool &isTailCall = CLI.IsTailCall;
5615 CallingConv::ID CallConv = CLI.CallConv;
5616 bool isVarArg = CLI.IsVarArg;
5617 bool isPatchPoint = CLI.IsPatchPoint;
5618 const CallBase *CB = CLI.CB;
5619
5620 if (isTailCall) {
5621 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5622 isTailCall = false;
5623 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5624 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5625 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5626 else
5627 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5628 Ins, DAG);
5629 if (isTailCall) {
5630 ++NumTailCalls;
5631 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5632 ++NumSiblingCalls;
5633
5634 // PC Relative calls no longer guarantee that the callee is a Global
5635 // Address Node. The callee could be an indirect tail call in which
5636 // case the SDValue for the callee could be a load (to load the address
5637 // of a function pointer) or it may be a register copy (to move the
5638 // address of the callee from a function parameter into a virtual
5639 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5640 assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))
5641 isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))
5642 "Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))
;
5643
5644 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
5645 << "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
;
5646 LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false)
;
5647 }
5648 }
5649
5650 if (!isTailCall && CB && CB->isMustTailCall())
5651 report_fatal_error("failed to perform tail call elimination on a call "
5652 "site marked musttail");
5653
5654 // When long calls (i.e. indirect calls) are always used, calls are always
5655 // made via function pointer. If we have a function name, first translate it
5656 // into a pointer.
5657 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5658 !isTailCall)
5659 Callee = LowerGlobalAddress(Callee, DAG);
5660
5661 CallFlags CFlags(
5662 CallConv, isTailCall, isVarArg, isPatchPoint,
5663 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5664 // hasNest
5665 Subtarget.is64BitELFABI() &&
5666 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5667 CLI.NoMerge);
5668
5669 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5670 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5671 InVals, CB);
5672
5673 if (Subtarget.isSVR4ABI())
5674 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5675 InVals, CB);
5676
5677 if (Subtarget.isAIXABI())
5678 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5679 InVals, CB);
5680
5681 return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5682 InVals, CB);
5683}
5684
5685SDValue PPCTargetLowering::LowerCall_32SVR4(
5686 SDValue Chain, SDValue Callee, CallFlags CFlags,
5687 const SmallVectorImpl<ISD::OutputArg> &Outs,
5688 const SmallVectorImpl<SDValue> &OutVals,
5689 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5690 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5691 const CallBase *CB) const {
5692 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5693 // of the 32-bit SVR4 ABI stack frame layout.
5694
5695 const CallingConv::ID CallConv = CFlags.CallConv;
5696 const bool IsVarArg = CFlags.IsVarArg;
5697 const bool IsTailCall = CFlags.IsTailCall;
5698
5699 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))
5700 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))
5701 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))
;
5702
5703 const Align PtrAlign(4);
5704
5705 MachineFunction &MF = DAG.getMachineFunction();
5706
5707 // Mark this function as potentially containing a function that contains a
5708 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5709 // and restoring the callers stack pointer in this functions epilog. This is
5710 // done because by tail calling the called function might overwrite the value
5711 // in this function's (MF) stack pointer stack slot 0(SP).
5712 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5713 CallConv == CallingConv::Fast)
5714 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5715
5716 // Count how many bytes are to be pushed on the stack, including the linkage
5717 // area, parameter list area and the part of the local variable space which
5718 // contains copies of aggregates which are passed by value.
5719
5720 // Assign locations to all of the outgoing arguments.
5721 SmallVector<CCValAssign, 16> ArgLocs;
5722 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5723
5724 // Reserve space for the linkage area on the stack.
5725 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5726 PtrAlign);
5727 if (useSoftFloat())
5728 CCInfo.PreAnalyzeCallOperands(Outs);
5729
5730 if (IsVarArg) {
5731 // Handle fixed and variable vector arguments differently.
5732 // Fixed vector arguments go into registers as long as registers are
5733 // available. Variable vector arguments always go into memory.
5734 unsigned NumArgs = Outs.size();
5735
5736 for (unsigned i = 0; i != NumArgs; ++i) {
5737 MVT ArgVT = Outs[i].VT;
5738 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5739 bool Result;
5740
5741 if (Outs[i].IsFixed) {
5742 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5743 CCInfo);
5744 } else {
5745 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5746 ArgFlags, CCInfo);
5747 }
5748
5749 if (Result) {
5750#ifndef NDEBUG
5751 errs() << "Call operand #" << i << " has unhandled type "
5752 << EVT(ArgVT).getEVTString() << "\n";
5753#endif
5754 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5754)
;
5755 }
5756 }
5757 } else {
5758 // All arguments are treated the same.
5759 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5760 }
5761 CCInfo.clearWasPPCF128();
5762
5763 // Assign locations to all of the outgoing aggregate by value arguments.
5764 SmallVector<CCValAssign, 16> ByValArgLocs;
5765 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5766
5767 // Reserve stack space for the allocations in CCInfo.
5768 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5769
5770 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5771
5772 // Size of the linkage area, parameter list area and the part of the local
5773 // space variable where copies of aggregates which are passed by value are
5774 // stored.
5775 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5776
5777 // Calculate by how many bytes the stack has to be adjusted in case of tail
5778 // call optimization.
5779 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5780
5781 // Adjust the stack pointer for the new arguments...
5782 // These operations are automatically eliminated by the prolog/epilog pass
5783 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5784 SDValue CallSeqStart = Chain;
5785
5786 // Load the return address and frame pointer so it can be moved somewhere else
5787 // later.
5788 SDValue LROp, FPOp;
5789 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5790
5791 // Set up a copy of the stack pointer for use loading and storing any
5792 // arguments that may not fit in the registers available for argument
5793 // passing.
5794 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5795
5796 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5797 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5798 SmallVector<SDValue, 8> MemOpChains;
5799
5800 bool seenFloatArg = false;
5801 // Walk the register/memloc assignments, inserting copies/loads.
5802 // i - Tracks the index into the list of registers allocated for the call
5803 // RealArgIdx - Tracks the index into the list of actual function arguments
5804 // j - Tracks the index into the list of byval arguments
5805 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5806 i != e;
5807 ++i, ++RealArgIdx) {
5808 CCValAssign &VA = ArgLocs[i];
5809 SDValue Arg = OutVals[RealArgIdx];
5810 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5811
5812 if (Flags.isByVal()) {
5813 // Argument is an aggregate which is passed by value, thus we need to
5814 // create a copy of it in the local variable space of the current stack
5815 // frame (which is the stack frame of the caller) and pass the address of
5816 // this copy to the callee.
5817 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5817, __PRETTY_FUNCTION__))
;
5818 CCValAssign &ByValVA = ByValArgLocs[j++];
5819 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5819, __PRETTY_FUNCTION__))
;
5820
5821 // Memory reserved in the local variable space of the callers stack frame.
5822 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5823
5824 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5825 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5826 StackPtr, PtrOff);
5827
5828 // Create a copy of the argument in the local area of the current
5829 // stack frame.
5830 SDValue MemcpyCall =
5831 CreateCopyOfByValArgument(Arg, PtrOff,
5832 CallSeqStart.getNode()->getOperand(0),
5833 Flags, DAG, dl);
5834
5835 // This must go outside the CALLSEQ_START..END.
5836 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5837 SDLoc(MemcpyCall));
5838 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5839 NewCallSeqStart.getNode());
5840 Chain = CallSeqStart = NewCallSeqStart;
5841
5842 // Pass the address of the aggregate copy on the stack either in a
5843 // physical register or in the parameter list area of the current stack
5844 // frame to the callee.
5845 Arg = PtrOff;
5846 }
5847
5848 // When useCRBits() is true, there can be i1 arguments.
5849 // It is because getRegisterType(MVT::i1) => MVT::i1,
5850 // and for other integer types getRegisterType() => MVT::i32.
5851 // Extend i1 and ensure callee will get i32.
5852 if (Arg.getValueType() == MVT::i1)
5853 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5854 dl, MVT::i32, Arg);
5855
5856 if (VA.isRegLoc()) {
5857 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5858 // Put argument in a physical register.
5859 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5860 bool IsLE = Subtarget.isLittleEndian();
5861 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5862 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5863 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5864 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5865 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5866 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5867 SVal.getValue(0)));
5868 } else
5869 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5870 } else {
5871 // Put argument in the parameter list area of the current stack frame.
5872 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5872, __PRETTY_FUNCTION__))
;
5873 unsigned LocMemOffset = VA.getLocMemOffset();
5874
5875 if (!IsTailCall) {
5876 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5877 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5878 StackPtr, PtrOff);
5879
5880 MemOpChains.push_back(
5881 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5882 } else {
5883 // Calculate and remember argument location.
5884 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5885 TailCallArguments);
5886 }
5887 }
5888 }
5889
5890 if (!MemOpChains.empty())
5891 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5892
5893 // Build a sequence of copy-to-reg nodes chained together with token chain
5894 // and flag operands which copy the outgoing args into the appropriate regs.
5895 SDValue InFlag;
5896 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5897 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5898 RegsToPass[i].second, InFlag);
5899 InFlag = Chain.getValue(1);
5900 }
5901
5902 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5903 // registers.
5904 if (IsVarArg) {
5905 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5906 SDValue Ops[] = { Chain, InFlag };
5907
5908 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5909 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5910
5911 InFlag = Chain.getValue(1);
5912 }
5913
5914 if (IsTailCall)
5915 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5916 TailCallArguments);
5917
5918 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5919 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5920}
5921
5922// Copy an argument into memory, being careful to do this outside the
5923// call sequence for the call to which the argument belongs.
5924SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5925 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5926 SelectionDAG &DAG, const SDLoc &dl) const {
5927 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5928 CallSeqStart.getNode()->getOperand(0),
5929 Flags, DAG, dl);
5930 // The MEMCPY must go outside the CALLSEQ_START..END.
5931 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5932 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5933 SDLoc(MemcpyCall));
5934 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5935 NewCallSeqStart.getNode());
5936 return NewCallSeqStart;
5937}
5938
5939SDValue PPCTargetLowering::LowerCall_64SVR4(
5940 SDValue Chain, SDValue Callee, CallFlags CFlags,
5941 const SmallVectorImpl<ISD::OutputArg> &Outs,
5942 const SmallVectorImpl<SDValue> &OutVals,
5943 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5944 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5945 const CallBase *CB) const {
5946 bool isELFv2ABI = Subtarget.isELFv2ABI();
5947 bool isLittleEndian = Subtarget.isLittleEndian();
5948 unsigned NumOps = Outs.size();
5949 bool IsSibCall = false;
5950 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5951
5952 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5953 unsigned PtrByteSize = 8;
5954
5955 MachineFunction &MF = DAG.getMachineFunction();
5956
5957 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5958 IsSibCall = true;
5959
5960 // Mark this function as potentially containing a function that contains a
5961 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5962 // and restoring the callers stack pointer in this functions epilog. This is
5963 // done because by tail calling the called function might overwrite the value
5964 // in this function's (MF) stack pointer stack slot 0(SP).
5965 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5966 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5967
5968 assert(!(IsFastCall && CFlags.IsVarArg) &&((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5969, __PRETTY_FUNCTION__))
5969 "fastcc not supported on varargs functions")((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5969, __PRETTY_FUNCTION__))
;
5970
5971 // Count how many bytes are to be pushed on the stack, including the linkage
5972 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5973 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5974 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5975 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5976 unsigned NumBytes = LinkageSize;
5977 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5978
5979 static const MCPhysReg GPR[] = {
5980 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5981 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5982 };
5983 static const MCPhysReg VR[] = {
5984 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5985 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5986 };
5987
5988 const unsigned NumGPRs = array_lengthof(GPR);
5989 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5990 const unsigned NumVRs = array_lengthof(VR);
5991
5992 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5993 // can be passed to the callee in registers.
5994 // For the fast calling convention, there is another check below.
5995 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5996 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5997 if (!HasParameterArea) {
5998 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5999 unsigned AvailableFPRs = NumFPRs;
6000 unsigned AvailableVRs = NumVRs;
6001 unsigned NumBytesTmp = NumBytes;
6002 for (unsigned i = 0; i != NumOps; ++i) {
6003 if (Outs[i].Flags.isNest()) continue;
6004 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6005 PtrByteSize, LinkageSize, ParamAreaSize,
6006 NumBytesTmp, AvailableFPRs, AvailableVRs))
6007 HasParameterArea = true;
6008 }
6009 }
6010
6011 // When using the fast calling convention, we don't provide backing for
6012 // arguments that will be in registers.
6013 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6014
6015 // Avoid allocating parameter area for fastcc functions if all the arguments
6016 // can be passed in the registers.
6017 if (IsFastCall)
6018 HasParameterArea = false;
6019
6020 // Add up all the space actually used.
6021 for (unsigned i = 0; i != NumOps; ++i) {
6022 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6023 EVT ArgVT = Outs[i].VT;
6024 EVT OrigVT = Outs[i].ArgVT;
6025
6026 if (Flags.isNest())
6027 continue;
6028
6029 if (IsFastCall) {
6030 if (Flags.isByVal()) {
6031 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6032 if (NumGPRsUsed > NumGPRs)
6033 HasParameterArea = true;
6034 } else {
6035 switch (ArgVT.getSimpleVT().SimpleTy) {
6036 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6036)
;
6037 case MVT::i1:
6038 case MVT::i32:
6039 case MVT::i64:
6040 if (++NumGPRsUsed <= NumGPRs)
6041 continue;
6042 break;
6043 case MVT::v4i32:
6044 case MVT::v8i16:
6045 case MVT::v16i8:
6046 case MVT::v2f64:
6047 case MVT::v2i64:
6048 case MVT::v1i128:
6049 case MVT::f128:
6050 if (++NumVRsUsed <= NumVRs)
6051 continue;
6052 break;
6053 case MVT::v4f32:
6054 if (++NumVRsUsed <= NumVRs)
6055 continue;
6056 break;
6057 case MVT::f32:
6058 case MVT::f64:
6059 if (++NumFPRsUsed <= NumFPRs)
6060 continue;
6061 break;
6062 }
6063 HasParameterArea = true;
6064 }
6065 }
6066
6067 /* Respect alignment of argument on the stack. */
6068 auto Alignement =
6069 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6070 NumBytes = alignTo(NumBytes, Alignement);
6071
6072 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6073 if (Flags.isInConsecutiveRegsLast())
6074 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6075 }
6076
6077 unsigned NumBytesActuallyUsed = NumBytes;
6078
6079 // In the old ELFv1 ABI,
6080 // the prolog code of the callee may store up to 8 GPR argument registers to
6081 // the stack, allowing va_start to index over them in memory if its varargs.
6082 // Because we cannot tell if this is needed on the caller side, we have to
6083 // conservatively assume that it is needed. As such, make sure we have at
6084 // least enough stack space for the caller to store the 8 GPRs.
6085 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6086 // really requires memory operands, e.g. a vararg function.
6087 if (HasParameterArea)
6088 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6089 else
6090 NumBytes = LinkageSize;
6091
6092 // Tail call needs the stack to be aligned.
6093 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6094 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6095
6096 int SPDiff = 0;
6097
6098 // Calculate by how many bytes the stack has to be adjusted in case of tail
6099 // call optimization.
6100 if (!IsSibCall)
6101 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6102
6103 // To protect arguments on the stack from being clobbered in a tail call,
6104 // force all the loads to happen before doing any other lowering.
6105 if (CFlags.IsTailCall)
6106 Chain = DAG.getStackArgumentTokenFactor(Chain);
6107
6108 // Adjust the stack pointer for the new arguments...
6109 // These operations are automatically eliminated by the prolog/epilog pass
6110 if (!IsSibCall)
6111 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6112 SDValue CallSeqStart = Chain;
6113
6114 // Load the return address and frame pointer so it can be move somewhere else
6115 // later.
6116 SDValue LROp, FPOp;
6117 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6118
6119 // Set up a copy of the stack pointer for use loading and storing any
6120 // arguments that may not fit in the registers available for argument
6121 // passing.
6122 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6123
6124 // Figure out which arguments are going to go in registers, and which in
6125 // memory. Also, if this is a vararg function, floating point operations
6126 // must be stored to our stack, and loaded into integer regs as well, if
6127 // any integer regs are available for argument passing.
6128 unsigned ArgOffset = LinkageSize;
6129
6130 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6131 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6132
6133 SmallVector<SDValue, 8> MemOpChains;
6134 for (unsigned i = 0; i != NumOps; ++i) {
6135 SDValue Arg = OutVals[i];
6136 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6137 EVT ArgVT = Outs[i].VT;
6138 EVT OrigVT = Outs[i].ArgVT;
6139
6140 // PtrOff will be used to store the current argument to the stack if a
6141 // register cannot be found for it.
6142 SDValue PtrOff;
6143
6144 // We re-align the argument offset for each argument, except when using the
6145 // fast calling convention, when we need to make sure we do that only when
6146 // we'll actually use a stack slot.
6147 auto ComputePtrOff = [&]() {
6148 /* Respect alignment of argument on the stack. */
6149 auto Alignment =
6150 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6151 ArgOffset = alignTo(ArgOffset, Alignment);
6152
6153 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6154
6155 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6156 };
6157
6158 if (!IsFastCall) {
6159 ComputePtrOff();
6160
6161 /* Compute GPR index associated with argument offset. */
6162 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6163 GPR_idx = std::min(GPR_idx, NumGPRs);
6164 }
6165
6166 // Promote integers to 64-bit values.
6167 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6168 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6169 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6170 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6171 }
6172
6173 // FIXME memcpy is used way more than necessary. Correctness first.
6174 // Note: "by value" is code for passing a structure by value, not
6175 // basic types.
6176 if (Flags.isByVal()) {
6177 // Note: Size includes alignment padding, so
6178 // struct x { short a; char b; }
6179 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6180 // These are the proper values we need for right-justifying the
6181 // aggregate in a parameter register.
6182 unsigned Size = Flags.getByValSize();
6183
6184 // An empty aggregate parameter takes up no storage and no
6185 // registers.
6186 if (Size == 0)
6187 continue;
6188
6189 if (IsFastCall)
6190 ComputePtrOff();
6191
6192 // All aggregates smaller than 8 bytes must be passed right-justified.
6193 if (Size==1 || Size==2 || Size==4) {
6194 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6195 if (GPR_idx != NumGPRs) {
6196 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6197 MachinePointerInfo(), VT);
6198 MemOpChains.push_back(Load.getValue(1));
6199 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6200
6201 ArgOffset += PtrByteSize;
6202 continue;
6203 }
6204 }
6205
6206 if (GPR_idx == NumGPRs && Size < 8) {
6207 SDValue AddPtr = PtrOff;
6208 if (!isLittleEndian) {
6209 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6210 PtrOff.getValueType());
6211 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6212 }
6213 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6214 CallSeqStart,
6215 Flags, DAG, dl);
6216 ArgOffset += PtrByteSize;
6217 continue;
6218 }
6219 // Copy entire object into memory. There are cases where gcc-generated
6220 // code assumes it is there, even if it could be put entirely into
6221 // registers. (This is not what the doc says.)
6222
6223 // FIXME: The above statement is likely due to a misunderstanding of the
6224 // documents. All arguments must be copied into the parameter area BY
6225 // THE CALLEE in the event that the callee takes the address of any
6226 // formal argument. That has not yet been implemented. However, it is
6227 // reasonable to use the stack area as a staging area for the register
6228 // load.
6229
6230 // Skip this for small aggregates, as we will use the same slot for a
6231 // right-justified copy, below.
6232 if (Size >= 8)
6233 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6234 CallSeqStart,
6235 Flags, DAG, dl);
6236
6237 // When a register is available, pass a small aggregate right-justified.
6238 if (Size < 8 && GPR_idx != NumGPRs) {
6239 // The easiest way to get this right-justified in a register
6240 // is to copy the structure into the rightmost portion of a
6241 // local variable slot, then load the whole slot into the
6242 // register.
6243 // FIXME: The memcpy seems to produce pretty awful code for
6244 // small aggregates, particularly for packed ones.
6245 // FIXME: It would be preferable to use the slot in the
6246 // parameter save area instead of a new local variable.
6247 SDValue AddPtr = PtrOff;
6248 if (!isLittleEndian) {
6249 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6250 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6251 }
6252 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6253 CallSeqStart,
6254 Flags, DAG, dl);
6255
6256 // Load the slot into the register.
6257 SDValue Load =
6258 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6259 MemOpChains.push_back(Load.getValue(1));
6260 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6261
6262 // Done with this argument.
6263 ArgOffset += PtrByteSize;
6264 continue;
6265 }
6266
6267 // For aggregates larger than PtrByteSize, copy the pieces of the
6268 // object that fit into registers from the parameter save area.
6269 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6270 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6271 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6272 if (GPR_idx != NumGPRs) {
6273 SDValue Load =
6274 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6275 MemOpChains.push_back(Load.getValue(1));
6276 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6277 ArgOffset += PtrByteSize;
6278 } else {
6279 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6280 break;
6281 }
6282 }
6283 continue;
6284 }
6285
6286 switch (Arg.getSimpleValueType().SimpleTy) {
6287 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6287)
;
6288 case MVT::i1:
6289 case MVT::i32:
6290 case MVT::i64:
6291 if (Flags.isNest()) {
6292 // The 'nest' parameter, if any, is passed in R11.
6293 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6294 break;
6295 }
6296
6297 // These can be scalar arguments or elements of an integer array type
6298 // passed directly. Clang may use those instead of "byval" aggregate
6299 // types to avoid forcing arguments to memory unnecessarily.
6300 if (GPR_idx != NumGPRs) {
6301 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6302 } else {
6303 if (IsFastCall)
6304 ComputePtrOff();
6305
6306 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6307, __PRETTY_FUNCTION__))
6307 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6307, __PRETTY_FUNCTION__))
;
6308 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6309 true, CFlags.IsTailCall, false, MemOpChains,
6310 TailCallArguments, dl);
6311 if (IsFastCall)
6312 ArgOffset += PtrByteSize;
6313 }
6314 if (!IsFastCall)
6315 ArgOffset += PtrByteSize;
6316 break;
6317 case MVT::f32:
6318 case MVT::f64: {
6319 // These can be scalar arguments or elements of a float array type
6320 // passed directly. The latter are used to implement ELFv2 homogenous
6321 // float aggregates.
6322
6323 // Named arguments go into FPRs first, and once they overflow, the
6324 // remaining arguments go into GPRs and then the parameter save area.
6325 // Unnamed arguments for vararg functions always go to GPRs and
6326 // then the parameter save area. For now, put all arguments to vararg
6327 // routines always in both locations (FPR *and* GPR or stack slot).
6328 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6329 bool NeededLoad = false;
6330
6331 // First load the argument into the next available FPR.
6332 if (FPR_idx != NumFPRs)
6333 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6334
6335 // Next, load the argument into GPR or stack slot if needed.
6336 if (!NeedGPROrStack)
6337 ;
6338 else if (GPR_idx != NumGPRs && !IsFastCall) {
6339 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6340 // once we support fp <-> gpr moves.
6341
6342 // In the non-vararg case, this can only ever happen in the
6343 // presence of f32 array types, since otherwise we never run
6344 // out of FPRs before running out of GPRs.
6345 SDValue ArgVal;
6346
6347 // Double values are always passed in a single GPR.
6348 if (Arg.getValueType() != MVT::f32) {
6349 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6350
6351 // Non-array float values are extended and passed in a GPR.
6352 } else if (!Flags.isInConsecutiveRegs()) {
6353 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6354 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6355
6356 // If we have an array of floats, we collect every odd element
6357 // together with its predecessor into one GPR.
6358 } else if (ArgOffset % PtrByteSize != 0) {
6359 SDValue Lo, Hi;
6360 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6361 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6362 if (!isLittleEndian)
6363 std::swap(Lo, Hi);
6364 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6365
6366 // The final element, if even, goes into the first half of a GPR.
6367 } else if (Flags.isInConsecutiveRegsLast()) {
6368 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6369 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6370 if (!isLittleEndian)
6371 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6372 DAG.getConstant(32, dl, MVT::i32));
6373
6374 // Non-final even elements are skipped; they will be handled
6375 // together the with subsequent argument on the next go-around.
6376 } else
6377 ArgVal = SDValue();
6378
6379 if (ArgVal.getNode())
6380 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6381 } else {
6382 if (IsFastCall)
6383 ComputePtrOff();
6384
6385 // Single-precision floating-point values are mapped to the
6386 // second (rightmost) word of the stack doubleword.
6387 if (Arg.getValueType() == MVT::f32 &&
6388 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6389 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6390 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6391 }
6392
6393 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6394, __PRETTY_FUNCTION__))
6394 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6394, __PRETTY_FUNCTION__))
;
6395 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6396 true, CFlags.IsTailCall, false, MemOpChains,
6397 TailCallArguments, dl);
6398
6399 NeededLoad = true;
6400 }
6401 // When passing an array of floats, the array occupies consecutive
6402 // space in the argument area; only round up to the next doubleword
6403 // at the end of the array. Otherwise, each float takes 8 bytes.
6404 if (!IsFastCall || NeededLoad) {
6405 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6406 Flags.isInConsecutiveRegs()) ? 4 : 8;
6407 if (Flags.isInConsecutiveRegsLast())
6408 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6409 }
6410 break;
6411 }
6412 case MVT::v4f32:
6413 case MVT::v4i32:
6414 case MVT::v8i16:
6415 case MVT::v16i8:
6416 case MVT::v2f64:
6417 case MVT::v2i64:
6418 case MVT::v1i128:
6419 case MVT::f128:
6420 // These can be scalar arguments or elements of a vector array type
6421 // passed directly. The latter are used to implement ELFv2 homogenous
6422 // vector aggregates.
6423
6424 // For a varargs call, named arguments go into VRs or on the stack as
6425 // usual; unnamed arguments always go to the stack or the corresponding
6426 // GPRs when within range. For now, we always put the value in both
6427 // locations (or even all three).
6428 if (CFlags.IsVarArg) {
6429 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__))
6430 "Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__))
;
6431 // We could elide this store in the case where the object fits
6432 // entirely in R registers. Maybe later.
6433 SDValue Store =
6434 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6435 MemOpChains.push_back(Store);
6436 if (VR_idx != NumVRs) {
6437 SDValue Load =
6438 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6439 MemOpChains.push_back(Load.getValue(1));
6440 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6441 }
6442 ArgOffset += 16;
6443 for (unsigned i=0; i<16; i+=PtrByteSize) {
6444 if (GPR_idx == NumGPRs)
6445 break;
6446 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6447 DAG.getConstant(i, dl, PtrVT));
6448 SDValue Load =
6449 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6450 MemOpChains.push_back(Load.getValue(1));
6451 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6452 }
6453 break;
6454 }
6455
6456 // Non-varargs Altivec params go into VRs or on the stack.
6457 if (VR_idx != NumVRs) {
6458 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6459 } else {
6460 if (IsFastCall)
6461 ComputePtrOff();
6462
6463 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6464, __PRETTY_FUNCTION__))
6464 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6464, __PRETTY_FUNCTION__))
;
6465 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6466 true, CFlags.IsTailCall, true, MemOpChains,
6467 TailCallArguments, dl);
6468 if (IsFastCall)
6469 ArgOffset += 16;
6470 }
6471
6472 if (!IsFastCall)
6473 ArgOffset += 16;
6474 break;
6475 }
6476 }
6477
6478 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6479, __PRETTY_FUNCTION__))
6479 "mismatch in size of parameter area")(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6479, __PRETTY_FUNCTION__))
;
6480 (void)NumBytesActuallyUsed;
6481
6482 if (!MemOpChains.empty())
6483 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6484
6485 // Check if this is an indirect call (MTCTR/BCTRL).
6486 // See prepareDescriptorIndirectCall and buildCallOperands for more
6487 // information about calls through function pointers in the 64-bit SVR4 ABI.
6488 if (CFlags.IsIndirect) {
6489 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6490 // caller in the TOC save area.
6491 if (isTOCSaveRestoreRequired(Subtarget)) {
6492 assert(!CFlags.IsTailCall && "Indirect tails calls not supported")((!CFlags.IsTailCall && "Indirect tails calls not supported"
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tails calls not supported\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6492, __PRETTY_FUNCTION__))
;
6493 // Load r2 into a virtual register and store it to the TOC save area.
6494 setUsesTOCBasePtr(DAG);
6495 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6496 // TOC save area offset.
6497 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6498 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6499 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6500 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6501 MachinePointerInfo::getStack(
6502 DAG.getMachineFunction(), TOCSaveOffset));
6503 }
6504 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6505 // This does not mean the MTCTR instruction must use R12; it's easier
6506 // to model this as an extra parameter, so do that.
6507 if (isELFv2ABI && !CFlags.IsPatchPoint)
6508 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6509 }
6510
6511 // Build a sequence of copy-to-reg nodes chained together with token chain
6512 // and flag operands which copy the outgoing args into the appropriate regs.
6513 SDValue InFlag;
6514 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6515 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6516 RegsToPass[i].second, InFlag);
6517 InFlag = Chain.getValue(1);
6518 }
6519
6520 if (CFlags.IsTailCall && !IsSibCall)
6521 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6522 TailCallArguments);
6523
6524 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6525 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6526}
6527
6528SDValue PPCTargetLowering::LowerCall_Darwin(
6529 SDValue Chain, SDValue Callee, CallFlags CFlags,
6530 const SmallVectorImpl<ISD::OutputArg> &Outs,
6531 const SmallVectorImpl<SDValue> &OutVals,
6532 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6533 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6534 const CallBase *CB) const {
6535 unsigned NumOps = Outs.size();
6536
6537 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6538 bool isPPC64 = PtrVT == MVT::i64;
6539 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6540
6541 MachineFunction &MF = DAG.getMachineFunction();
6542
6543 // Mark this function as potentially containing a function that contains a
6544 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6545 // and restoring the callers stack pointer in this functions epilog. This is
6546 // done because by tail calling the called function might overwrite the value
6547 // in this function's (MF) stack pointer stack slot 0(SP).
6548 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6549 CFlags.CallConv == CallingConv::Fast)
6550 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6551
6552 // Count how many bytes are to be pushed on the stack, including the linkage
6553 // area, and parameter passing area. We start with 24/48 bytes, which is
6554 // prereserved space for [SP][CR][LR][3 x unused].
6555 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6556 unsigned NumBytes = LinkageSize;
6557
6558 // Add up all the space actually used.
6559 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6560 // they all go in registers, but we must reserve stack space for them for
6561 // possible use by the caller. In varargs or 64-bit calls, parameters are
6562 // assigned stack space in order, with padding so Altivec parameters are
6563 // 16-byte aligned.
6564 unsigned nAltivecParamsAtEnd = 0;
6565 for (unsigned i = 0; i != NumOps; ++i) {
6566 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6567 EVT ArgVT = Outs[i].VT;
6568 // Varargs Altivec parameters are padded to a 16 byte boundary.
6569 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6570 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6571 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6572 if (!CFlags.IsVarArg && !isPPC64) {
6573 // Non-varargs Altivec parameters go after all the non-Altivec
6574 // parameters; handle those later so we know how much padding we need.
6575 nAltivecParamsAtEnd++;
6576 continue;
6577 }
6578 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6579 NumBytes = ((NumBytes+15)/16)*16;
6580 }
6581 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6582 }
6583
6584 // Allow for Altivec parameters at the end, if needed.
6585 if (nAltivecParamsAtEnd) {
6586 NumBytes = ((NumBytes+15)/16)*16;
6587 NumBytes += 16*nAltivecParamsAtEnd;
6588 }
6589
6590 // The prolog code of the callee may store up to 8 GPR argument registers to
6591 // the stack, allowing va_start to index over them in memory if its varargs.
6592 // Because we cannot tell if this is needed on the caller side, we have to
6593 // conservatively assume that it is needed. As such, make sure we have at
6594 // least enough stack space for the caller to store the 8 GPRs.
6595 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6596
6597 // Tail call needs the stack to be aligned.
6598 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6599 CFlags.CallConv == CallingConv::Fast)
6600 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6601
6602 // Calculate by how many bytes the stack has to be adjusted in case of tail
6603 // call optimization.
6604 int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6605
6606 // To protect arguments on the stack from being clobbered in a tail call,
6607 // force all the loads to happen before doing any other lowering.
6608 if (CFlags.IsTailCall)
6609 Chain = DAG.getStackArgumentTokenFactor(Chain);
6610
6611 // Adjust the stack pointer for the new arguments...
6612 // These operations are automatically eliminated by the prolog/epilog pass
6613 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6614 SDValue CallSeqStart = Chain;
6615
6616 // Load the return address and frame pointer so it can be move somewhere else
6617 // later.
6618 SDValue LROp, FPOp;
6619 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6620
6621 // Set up a copy of the stack pointer for use loading and storing any
6622 // arguments that may not fit in the registers available for argument
6623 // passing.
6624 SDValue StackPtr;
6625 if (isPPC64)
6626 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6627 else
6628 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6629
6630 // Figure out which arguments are going to go in registers, and which in
6631 // memory. Also, if this is a vararg function, floating point operations
6632 // must be stored to our stack, and loaded into integer regs as well, if
6633 // any integer regs are available for argument passing.
6634 unsigned ArgOffset = LinkageSize;
6635 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6636
6637 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6638 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6639 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6640 };
6641 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6642 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6643 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6644 };
6645 static const MCPhysReg VR[] = {
6646 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6647 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6648 };
6649 const unsigned NumGPRs = array_lengthof(GPR_32);
6650 const unsigned NumFPRs = 13;
6651 const unsigned NumVRs = array_lengthof(VR);
6652
6653 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6654
6655 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6656 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6657
6658 SmallVector<SDValue, 8> MemOpChains;
6659 for (unsigned i = 0; i != NumOps; ++i) {
6660 SDValue Arg = OutVals[i];
6661 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6662
6663 // PtrOff will be used to store the current argument to the stack if a
6664 // register cannot be found for it.
6665 SDValue PtrOff;
6666
6667 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6668
6669 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6670
6671 // On PPC64, promote integers to 64-bit values.
6672 if (isPPC64 && Arg.getValueType() == MVT::i32) {
6673 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6674 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6675 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6676 }
6677
6678 // FIXME memcpy is used way more than necessary. Correctness first.
6679 // Note: "by value" is code for passing a structure by value, not
6680 // basic types.
6681 if (Flags.isByVal()) {
6682 unsigned Size = Flags.getByValSize();
6683 // Very small objects are passed right-justified. Everything else is
6684 // passed left-justified.
6685 if (Size==1 || Size==2) {
6686 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6687 if (GPR_idx != NumGPRs) {
6688 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6689 MachinePointerInfo(), VT);
6690 MemOpChains.push_back(Load.getValue(1));
6691 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6692
6693 ArgOffset += PtrByteSize;
6694 } else {
6695 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6696 PtrOff.getValueType());
6697 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6698 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6699 CallSeqStart,
6700 Flags, DAG, dl);
6701 ArgOffset += PtrByteSize;
6702 }
6703 continue;
6704 }
6705 // Copy entire object into memory. There are cases where gcc-generated
6706 // code assumes it is there, even if it could be put entirely into
6707 // registers. (This is not what the doc says.)
6708 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6709 CallSeqStart,
6710 Flags, DAG, dl);
6711
6712 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6713 // copy the pieces of the object that fit into registers from the
6714 // parameter save area.
6715 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6716 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6717 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6718 if (GPR_idx != NumGPRs) {
6719 SDValue Load =
6720 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6721 MemOpChains.push_back(Load.getValue(1));
6722 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6723 ArgOffset += PtrByteSize;
6724 } else {
6725 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6726 break;
6727 }
6728 }
6729 continue;
6730 }
6731
6732 switch (Arg.getSimpleValueType().SimpleTy) {
6733 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6733)
;
6734 case MVT::i1:
6735 case MVT::i32:
6736 case MVT::i64:
6737 if (GPR_idx != NumGPRs) {
6738 if (Arg.getValueType() == MVT::i1)
6739 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6740
6741 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6742 } else {
6743 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6744 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6745 TailCallArguments, dl);
6746 }
6747 ArgOffset += PtrByteSize;
6748 break;
6749 case MVT::f32:
6750 case MVT::f64:
6751 if (FPR_idx != NumFPRs) {
6752 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6753
6754 if (CFlags.IsVarArg) {
6755 SDValue Store =
6756 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6757 MemOpChains.push_back(Store);
6758
6759 // Float varargs are always shadowed in available integer registers
6760 if (GPR_idx != NumGPRs) {
6761 SDValue Load =
6762 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6763 MemOpChains.push_back(Load.getValue(1));
6764 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6765 }
6766 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6767 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6768 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6769 SDValue Load =
6770 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6771 MemOpChains.push_back(Load.getValue(1));
6772 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6773 }
6774 } else {
6775 // If we have any FPRs remaining, we may also have GPRs remaining.
6776 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6777 // GPRs.
6778 if (GPR_idx != NumGPRs)
6779 ++GPR_idx;
6780 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6781 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6782 ++GPR_idx;
6783 }
6784 } else
6785 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6786 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6787 TailCallArguments, dl);
6788 if (isPPC64)
6789 ArgOffset += 8;
6790 else
6791 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6792 break;
6793 case MVT::v4f32:
6794 case MVT::v4i32:
6795 case MVT::v8i16:
6796 case MVT::v16i8:
6797 if (CFlags.IsVarArg) {
6798 // These go aligned on the stack, or in the corresponding R registers
6799 // when within range. The Darwin PPC ABI doc claims they also go in
6800 // V registers; in fact gcc does this only for arguments that are
6801 // prototyped, not for those that match the ... We do it for all
6802 // arguments, seems to work.
6803 while (ArgOffset % 16 !=0) {
6804 ArgOffset += PtrByteSize;
6805 if (GPR_idx != NumGPRs)
6806 GPR_idx++;
6807 }
6808 // We could elide this store in the case where the object fits
6809 // entirely in R registers. Maybe later.
6810 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6811 DAG.getConstant(ArgOffset, dl, PtrVT));
6812 SDValue Store =
6813 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6814 MemOpChains.push_back(Store);
6815 if (VR_idx != NumVRs) {
6816 SDValue Load =
6817 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6818 MemOpChains.push_back(Load.getValue(1));
6819 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6820 }
6821 ArgOffset += 16;
6822 for (unsigned i=0; i<16; i+=PtrByteSize) {
6823 if (GPR_idx == NumGPRs)
6824 break;
6825 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6826 DAG.getConstant(i, dl, PtrVT));
6827 SDValue Load =
6828 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6829 MemOpChains.push_back(Load.getValue(1));
6830 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6831 }
6832 break;
6833 }
6834
6835 // Non-varargs Altivec params generally go in registers, but have
6836 // stack space allocated at the end.
6837 if (VR_idx != NumVRs) {
6838 // Doesn't have GPR space allocated.
6839 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6840 } else if (nAltivecParamsAtEnd==0) {
6841 // We are emitting Altivec params in order.
6842 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6843 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6844 TailCallArguments, dl);
6845 ArgOffset += 16;
6846 }
6847 break;
6848 }
6849 }
6850 // If all Altivec parameters fit in registers, as they usually do,
6851 // they get stack space following the non-Altivec parameters. We
6852 // don't track this here because nobody below needs it.
6853 // If there are more Altivec parameters than fit in registers emit
6854 // the stores here.
6855 if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6856 unsigned j = 0;
6857 // Offset is aligned; skip 1st 12 params which go in V registers.
6858 ArgOffset = ((ArgOffset+15)/16)*16;
6859 ArgOffset += 12*16;
6860 for (unsigned i = 0; i != NumOps; ++i) {
6861 SDValue Arg = OutVals[i];
6862 EVT ArgType = Outs[i].VT;
6863 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6864 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6865 if (++j > NumVRs) {
6866 SDValue PtrOff;
6867 // We are emitting Altivec params in order.
6868 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6869 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6870 TailCallArguments, dl);
6871 ArgOffset += 16;
6872 }
6873 }
6874 }
6875 }
6876
6877 if (!MemOpChains.empty())
6878 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6879
6880 // On Darwin, R12 must contain the address of an indirect callee. This does
6881 // not mean the MTCTR instruction must use R12; it's easier to model this as
6882 // an extra parameter, so do that.
6883 if (CFlags.IsIndirect) {
6884 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6884, __PRETTY_FUNCTION__))
;
6885 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6886 PPC::R12), Callee));
6887 }
6888
6889 // Build a sequence of copy-to-reg nodes chained together with token chain
6890 // and flag operands which copy the outgoing args into the appropriate regs.
6891 SDValue InFlag;
6892 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6893 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6894 RegsToPass[i].second, InFlag);
6895 InFlag = Chain.getValue(1);
6896 }
6897
6898 if (CFlags.IsTailCall)
6899 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6900 TailCallArguments);
6901
6902 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6903 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6904}
6905
6906static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6907 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6908 CCState &State) {
6909
6910 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6911 State.getMachineFunction().getSubtarget());
6912 const bool IsPPC64 = Subtarget.isPPC64();
6913 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6914 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6915
6916 assert((!ValVT.isInteger() ||(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))
6917 (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))
6918 "Integer argument exceeds register size: should have been legalized")(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))
;
6919
6920 if (ValVT == MVT::f128)
6921 report_fatal_error("f128 is unimplemented on AIX.");
6922
6923 if (ArgFlags.isNest())
6924 report_fatal_error("Nest arguments are unimplemented.");
6925
6926 if (ValVT.isVector() || LocVT.isVector())
6927 report_fatal_error("Vector arguments are unimplemented on AIX.");
6928
6929 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6930 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6931 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6932 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6933 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6934 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6935
6936 if (ArgFlags.isByVal()) {
6937 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6938 report_fatal_error("Pass-by-value arguments with alignment greater than "
6939 "register width are not supported.");
6940
6941 const unsigned ByValSize = ArgFlags.getByValSize();
6942
6943 // An empty aggregate parameter takes up no storage and no registers,
6944 // but needs a MemLoc for a stack slot for the formal arguments side.
6945 if (ByValSize == 0) {
6946 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6947 State.getNextStackOffset(), RegVT,
6948 LocInfo));
6949 return false;
6950 }
6951
6952 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6953 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6954 for (const unsigned E = Offset + StackSize; Offset < E;
6955 Offset += PtrAlign.value()) {
6956 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6957 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6958 else {
6959 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6960 Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6961 LocInfo));
6962 break;
6963 }
6964 }
6965 return false;
6966 }
6967
6968 // Arguments always reserve parameter save area.
6969 switch (ValVT.SimpleTy) {
6970 default:
6971 report_fatal_error("Unhandled value type for argument.");
6972 case MVT::i64:
6973 // i64 arguments should have been split to i32 for PPC32.
6974 assert(IsPPC64 && "PPC32 should have split i64 values.")((IsPPC64 && "PPC32 should have split i64 values.") ?
static_cast<void> (0) : __assert_fail ("IsPPC64 && \"PPC32 should have split i64 values.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6974, __PRETTY_FUNCTION__))
;
6975 LLVM_FALLTHROUGH[[gnu::fallthrough]];
6976 case MVT::i1:
6977 case MVT::i32: {
6978 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6979 // AIX integer arguments are always passed in register width.
6980 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6981 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6982 : CCValAssign::LocInfo::ZExt;
6983 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6984 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6985 else
6986 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6987
6988 return false;
6989 }
6990 case MVT::f32:
6991 case MVT::f64: {
6992 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6993 const unsigned StoreSize = LocVT.getStoreSize();
6994 // Floats are always 4-byte aligned in the PSA on AIX.
6995 // This includes f64 in 64-bit mode for ABI compatibility.
6996 const unsigned Offset =
6997 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6998 unsigned FReg = State.AllocateReg(FPR);
6999 if (FReg)
7000 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7001
7002 // Reserve and initialize GPRs or initialize the PSA as required.
7003 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
7004 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7005 assert(FReg && "An FPR should be available when a GPR is reserved.")((FReg && "An FPR should be available when a GPR is reserved."
) ? static_cast<void> (0) : __assert_fail ("FReg && \"An FPR should be available when a GPR is reserved.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7005, __PRETTY_FUNCTION__))
;
7006 if (State.isVarArg()) {
7007 // Successfully reserved GPRs are only initialized for vararg calls.
7008 // Custom handling is required for:
7009 // f64 in PPC32 needs to be split into 2 GPRs.
7010 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7011 State.addLoc(
7012 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7013 }
7014 } else {
7015 // If there are insufficient GPRs, the PSA needs to be initialized.
7016 // Initialization occurs even if an FPR was initialized for
7017 // compatibility with the AIX XL compiler. The full memory for the
7018 // argument will be initialized even if a prior word is saved in GPR.
7019 // A custom memLoc is used when the argument also passes in FPR so
7020 // that the callee handling can skip over it easily.
7021 State.addLoc(
7022 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7023 LocInfo)
7024 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7025 break;
7026 }
7027 }
7028
7029 return false;
7030 }
7031 }
7032 return true;
7033}
7034
7035static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7036 bool IsPPC64) {
7037 assert((IsPPC64 || SVT != MVT::i64) &&(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7038, __PRETTY_FUNCTION__))
7038 "i64 should have been split for 32-bit codegen.")(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7038, __PRETTY_FUNCTION__))
;
7039
7040 switch (SVT) {
7041 default:
7042 report_fatal_error("Unexpected value type for formal argument");
7043 case MVT::i1:
7044 case MVT::i32:
7045 case MVT::i64:
7046 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7047 case MVT::f32:
7048 return &PPC::F4RCRegClass;
7049 case MVT::f64:
7050 return &PPC::F8RCRegClass;
7051 }
7052}
7053
7054static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7055 SelectionDAG &DAG, SDValue ArgValue,
7056 MVT LocVT, const SDLoc &dl) {
7057 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger())((ValVT.isScalarInteger() && LocVT.isScalarInteger())
? static_cast<void> (0) : __assert_fail ("ValVT.isScalarInteger() && LocVT.isScalarInteger()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7057, __PRETTY_FUNCTION__))
;
7058 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())((ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())
? static_cast<void> (0) : __assert_fail ("ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7058, __PRETTY_FUNCTION__))
;
7059
7060 if (Flags.isSExt())
7061 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7062 DAG.getValueType(ValVT));
7063 else if (Flags.isZExt())
7064 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7065 DAG.getValueType(ValVT));
7066
7067 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7068}
7069
7070static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7071 const unsigned LASize = FL->getLinkageSize();
7072
7073 if (PPC::GPRCRegClass.contains(Reg)) {
7074 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))
7075 "Reg must be a valid argument register!")((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))
;
7076 return LASize + 4 * (Reg - PPC::R3);
7077 }
7078
7079 if (PPC::G8RCRegClass.contains(Reg)) {
7080 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7081, __PRETTY_FUNCTION__))
7081 "Reg must be a valid argument register!")((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7081, __PRETTY_FUNCTION__))
;
7082 return LASize + 8 * (Reg - PPC::X3);
7083 }
7084
7085 llvm_unreachable("Only general purpose registers expected.")::llvm::llvm_unreachable_internal("Only general purpose registers expected."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7085)
;
7086}
7087
7088// AIX ABI Stack Frame Layout:
7089//
7090// Low Memory +--------------------------------------------+
7091// SP +---> | Back chain | ---+
7092// | +--------------------------------------------+ |
7093// | | Saved Condition Register | |
7094// | +--------------------------------------------+ |
7095// | | Saved Linkage Register | |
7096// | +--------------------------------------------+ | Linkage Area
7097// | | Reserved for compilers | |
7098// | +--------------------------------------------+ |
7099// | | Reserved for binders | |
7100// | +--------------------------------------------+ |
7101// | | Saved TOC pointer | ---+
7102// | +--------------------------------------------+
7103// | | Parameter save area |
7104// | +--------------------------------------------+
7105// | | Alloca space |
7106// | +--------------------------------------------+
7107// | | Local variable space |
7108// | +--------------------------------------------+
7109// | | Float/int conversion temporary |
7110// | +--------------------------------------------+
7111// | | Save area for AltiVec registers |
7112// | +--------------------------------------------+
7113// | | AltiVec alignment padding |
7114// | +--------------------------------------------+
7115// | | Save area for VRSAVE register |
7116// | +--------------------------------------------+
7117// | | Save area for General Purpose registers |
7118// | +--------------------------------------------+
7119// | | Save area for Floating Point registers |
7120// | +--------------------------------------------+
7121// +---- | Back chain |
7122// High Memory +--------------------------------------------+
7123//
7124// Specifications:
7125// AIX 7.2 Assembler Language Reference
7126// Subroutine linkage convention
7127
7128SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7129 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7130 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7131 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7132
7133 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))
7134 CallConv == CallingConv::Fast) &&(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))
7135 "Unexpected calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))
;
7136
7137 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7138 report_fatal_error("Tail call support is unimplemented on AIX.");
7139
7140 if (useSoftFloat())
7141 report_fatal_error("Soft float support is unimplemented on AIX.");
7142
7143 const PPCSubtarget &Subtarget =
7144 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7145
7146 const bool IsPPC64 = Subtarget.isPPC64();
7147 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7148
7149 // Assign locations to all of the incoming arguments.
7150 SmallVector<CCValAssign, 16> ArgLocs;
7151 MachineFunction &MF = DAG.getMachineFunction();
7152 MachineFrameInfo &MFI = MF.getFrameInfo();
7153 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7154
7155 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7156 // Reserve space for the linkage area on the stack.
7157 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7158 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7159 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7160
7161 SmallVector<SDValue, 8> MemOps;
7162
7163 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7164 CCValAssign &VA = ArgLocs[I++];
7165 MVT LocVT = VA.getLocVT();
7166 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7167
7168 // For compatibility with the AIX XL compiler, the float args in the
7169 // parameter save area are initialized even if the argument is available
7170 // in register. The caller is required to initialize both the register
7171 // and memory, however, the callee can choose to expect it in either.
7172 // The memloc is dismissed here because the argument is retrieved from
7173 // the register.
7174 if (VA.isMemLoc() && VA.needsCustom())
7175 continue;
7176
7177 if (Flags.isByVal() && VA.isMemLoc()) {
7178 const unsigned Size =
7179 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7180 PtrByteSize);
7181 const int FI = MF.getFrameInfo().CreateFixedObject(
7182 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7183 /* IsAliased */ true);
7184 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7185 InVals.push_back(FIN);
7186
7187 continue;
7188 }
7189
7190 if (Flags.isByVal()) {
7191 assert(VA.isRegLoc() && "MemLocs should already be handled.")((VA.isRegLoc() && "MemLocs should already be handled."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"MemLocs should already be handled.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7191, __PRETTY_FUNCTION__))
;
7192
7193 const MCPhysReg ArgReg = VA.getLocReg();
7194 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7195
7196 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7197 report_fatal_error("Over aligned byvals not supported yet.");
7198
7199 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7200 const int FI = MF.getFrameInfo().CreateFixedObject(
7201 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7202 /* IsAliased */ true);
7203 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7204 InVals.push_back(FIN);
7205
7206 // Add live ins for all the RegLocs for the same ByVal.
7207 const TargetRegisterClass *RegClass =
7208 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7209
7210 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7211 unsigned Offset) {
7212 const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7213 // Since the callers side has left justified the aggregate in the
7214 // register, we can simply store the entire register into the stack
7215 // slot.
7216 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7217 // The store to the fixedstack object is needed becuase accessing a
7218 // field of the ByVal will use a gep and load. Ideally we will optimize
7219 // to extracting the value from the register directly, and elide the
7220 // stores when the arguments address is not taken, but that will need to
7221 // be future work.
7222 SDValue Store = DAG.getStore(
7223 CopyFrom.getValue(1), dl, CopyFrom,
7224 DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7225 MachinePointerInfo::getFixedStack(MF, FI, Offset));
7226
7227 MemOps.push_back(Store);
7228 };
7229
7230 unsigned Offset = 0;
7231 HandleRegLoc(VA.getLocReg(), Offset);
7232 Offset += PtrByteSize;
7233 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7234 Offset += PtrByteSize) {
7235 assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))
7236 "RegLocs should be for ByVal argument.")((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))
;
7237
7238 const CCValAssign RL = ArgLocs[I++];
7239 HandleRegLoc(RL.getLocReg(), Offset);
7240 }
7241
7242 if (Offset != StackSize) {
7243 assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7244, __PRETTY_FUNCTION__))
7244 "Expected MemLoc for remaining bytes.")((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7244, __PRETTY_FUNCTION__))
;
7245 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.")((ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].isMemLoc() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7245, __PRETTY_FUNCTION__))
;
7246 // Consume the MemLoc.The InVal has already been emitted, so nothing
7247 // more needs to be done.
7248 ++I;
7249 }
7250
7251 continue;
7252 }
7253
7254 EVT ValVT = VA.getValVT();
7255 if (VA.isRegLoc() && !VA.needsCustom()) {
7256 MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7257 unsigned VReg =
7258 MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7259 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7260 if (ValVT.isScalarInteger() &&
7261 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7262 ArgValue =
7263 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7264 }
7265 InVals.push_back(ArgValue);
7266 continue;
7267 }
7268 if (VA.isMemLoc()) {
7269 const unsigned LocSize = LocVT.getStoreSize();
7270 const unsigned ValSize = ValVT.getStoreSize();
7271 assert((ValSize <= LocSize) &&(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7272, __PRETTY_FUNCTION__))
7272 "Object size is larger than size of MemLoc")(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7272, __PRETTY_FUNCTION__))
;
7273 int CurArgOffset = VA.getLocMemOffset();
7274 // Objects are right-justified because AIX is big-endian.
7275 if (LocSize > ValSize)
7276 CurArgOffset += LocSize - ValSize;
7277 // Potential tail calls could cause overwriting of argument stack slots.
7278 const bool IsImmutable =
7279 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7280 (CallConv == CallingConv::Fast));
7281 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7282 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7283 SDValue ArgValue =
7284 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7285 InVals.push_back(ArgValue);
7286 continue;
7287 }
7288 }
7289
7290 // On AIX a minimum of 8 words is saved to the parameter save area.
7291 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7292 // Area that is at least reserved in the caller of this function.
7293 unsigned CallerReservedArea =
7294 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7295
7296 // Set the size that is at least reserved in caller of this function. Tail
7297 // call optimized function's reserved stack space needs to be aligned so
7298 // that taking the difference between two stack areas will result in an
7299 // aligned stack.
7300 CallerReservedArea =
7301 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7302 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7303 FuncInfo->setMinReservedArea(CallerReservedArea);
7304
7305 if (isVarArg) {
7306 FuncInfo->setVarArgsFrameIndex(
7307 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7308 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7309
7310 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7311 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7312
7313 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7314 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7315 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7316
7317 // The fixed integer arguments of a variadic function are stored to the
7318 // VarArgsFrameIndex on the stack so that they may be loaded by
7319 // dereferencing the result of va_next.
7320 for (unsigned GPRIndex =
7321 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7322 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7323
7324 const unsigned VReg =
7325 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7326 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7327
7328 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7329 SDValue Store =
7330 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7331 MemOps.push_back(Store);
7332 // Increment the address for the next argument to store.
7333 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7334 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7335 }
7336 }
7337
7338 if (!MemOps.empty())
7339 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7340
7341 return Chain;
7342}
7343
7344SDValue PPCTargetLowering::LowerCall_AIX(
7345 SDValue Chain, SDValue Callee, CallFlags CFlags,
7346 const SmallVectorImpl<ISD::OutputArg> &Outs,
7347 const SmallVectorImpl<SDValue> &OutVals,
7348 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7349 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7350 const CallBase *CB) const {
7351 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7352 // AIX ABI stack frame layout.
7353
7354 assert((CFlags.CallConv == CallingConv::C ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
7355 CFlags.CallConv == CallingConv::Cold ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
7356 CFlags.CallConv == CallingConv::Fast) &&(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
7357 "Unexpected calling convention!")(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
;
7358
7359 if (CFlags.IsPatchPoint)
7360 report_fatal_error("This call type is unimplemented on AIX.");
7361
7362 const PPCSubtarget& Subtarget =
7363 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7364 if (Subtarget.hasAltivec())
7365 report_fatal_error("Altivec support is unimplemented on AIX.");
7366
7367 MachineFunction &MF = DAG.getMachineFunction();
7368 SmallVector<CCValAssign, 16> ArgLocs;
7369 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7370 *DAG.getContext());
7371
7372 // Reserve space for the linkage save area (LSA) on the stack.
7373 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7374 // [SP][CR][LR][2 x reserved][TOC].
7375 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7376 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7377 const bool IsPPC64 = Subtarget.isPPC64();
7378 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7379 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7380 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7381 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7382
7383 // The prolog code of the callee may store up to 8 GPR argument registers to
7384 // the stack, allowing va_start to index over them in memory if the callee
7385 // is variadic.
7386 // Because we cannot tell if this is needed on the caller side, we have to
7387 // conservatively assume that it is needed. As such, make sure we have at
7388 // least enough stack space for the caller to store the 8 GPRs.
7389 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7390 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7391 CCInfo.getNextStackOffset());
7392
7393 // Adjust the stack pointer for the new arguments...
7394 // These operations are automatically eliminated by the prolog/epilog pass.
7395 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7396 SDValue CallSeqStart = Chain;
7397
7398 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7399 SmallVector<SDValue, 8> MemOpChains;
7400
7401 // Set up a copy of the stack pointer for loading and storing any
7402 // arguments that may not fit in the registers available for argument
7403 // passing.
7404 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7405 : DAG.getRegister(PPC::R1, MVT::i32);
7406
7407 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7408 const unsigned ValNo = ArgLocs[I].getValNo();
7409 SDValue Arg = OutVals[ValNo];
7410 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7411
7412 if (Flags.isByVal()) {
7413 const unsigned ByValSize = Flags.getByValSize();
7414
7415 // Nothing to do for zero-sized ByVals on the caller side.
7416 if (!ByValSize) {
7417 ++I;
7418 continue;
7419 }
7420
7421 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7422 return DAG.getExtLoad(
7423 ISD::ZEXTLOAD, dl, PtrVT, Chain,
7424 (LoadOffset != 0)
7425 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7426 : Arg,
7427 MachinePointerInfo(), VT);
7428 };
7429
7430 unsigned LoadOffset = 0;
7431
7432 // Initialize registers, which are fully occupied by the by-val argument.
7433 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7434 SDValue Load = GetLoad(PtrVT, LoadOffset);
7435 MemOpChains.push_back(Load.getValue(1));
7436 LoadOffset += PtrByteSize;
7437 const CCValAssign &ByValVA = ArgLocs[I++];
7438 assert(ByValVA.getValNo() == ValNo &&((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7439, __PRETTY_FUNCTION__))
7439 "Unexpected location for pass-by-value argument.")((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7439, __PRETTY_FUNCTION__))
;
7440 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7441 }
7442
7443 if (LoadOffset == ByValSize)
7444 continue;
7445
7446 // There must be one more loc to handle the remainder.
7447 assert(ArgLocs[I].getValNo() == ValNo &&((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7448, __PRETTY_FUNCTION__))
7448 "Expected additional location for by-value argument.")((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7448, __PRETTY_FUNCTION__))
;
7449
7450 if (ArgLocs[I].isMemLoc()) {
7451 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.")((LoadOffset < ByValSize && "Unexpected memloc for by-val arg."
) ? static_cast<void> (0) : __assert_fail ("LoadOffset < ByValSize && \"Unexpected memloc for by-val arg.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7451, __PRETTY_FUNCTION__))
;
7452 const CCValAssign &ByValVA = ArgLocs[I++];
7453 ISD::ArgFlagsTy MemcpyFlags = Flags;
7454 // Only memcpy the bytes that don't pass in register.
7455 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7456 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7457 (LoadOffset != 0)
7458 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7459 : Arg,
7460 DAG.getObjectPtrOffset(dl, StackPtr,
7461 TypeSize::Fixed(ByValVA.getLocMemOffset())),
7462 CallSeqStart, MemcpyFlags, DAG, dl);
7463 continue;
7464 }
7465
7466 // Initialize the final register residue.
7467 // Any residue that occupies the final by-val arg register must be
7468 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7469 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7470 // 2 and 1 byte loads.
7471 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7472 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7473, __PRETTY_FUNCTION__))
7473 "Unexpected register residue for by-value argument.")((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7473, __PRETTY_FUNCTION__))
;
7474 SDValue ResidueVal;
7475 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7476 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7477 const MVT VT =
7478 N == 1 ? MVT::i8
7479 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7480 SDValue Load = GetLoad(VT, LoadOffset);
7481 MemOpChains.push_back(Load.getValue(1));
7482 LoadOffset += N;
7483 Bytes += N;
7484
7485 // By-val arguments are passed left-justfied in register.
7486 // Every load here needs to be shifted, otherwise a full register load
7487 // should have been used.
7488 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))
7489 "Unexpected load emitted during handling of pass-by-value "((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))
7490 "argument.")((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))
;
7491 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7492 EVT ShiftAmountTy =
7493 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7494 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7495 SDValue ShiftedLoad =
7496 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7497 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7498 ShiftedLoad)
7499 : ShiftedLoad;
7500 }
7501
7502 const CCValAssign &ByValVA = ArgLocs[I++];
7503 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7504 continue;
7505 }
7506
7507 CCValAssign &VA = ArgLocs[I++];
7508 const MVT LocVT = VA.getLocVT();
7509 const MVT ValVT = VA.getValVT();
7510
7511 switch (VA.getLocInfo()) {
7512 default:
7513 report_fatal_error("Unexpected argument extension type.");
7514 case CCValAssign::Full:
7515 break;
7516 case CCValAssign::ZExt:
7517 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7518 break;
7519 case CCValAssign::SExt:
7520 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7521 break;
7522 }
7523
7524 if (VA.isRegLoc() && !VA.needsCustom()) {
7525 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7526 continue;
7527 }
7528
7529 if (VA.isMemLoc()) {
7530 SDValue PtrOff =
7531 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7532 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7533 MemOpChains.push_back(
7534 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7535
7536 continue;
7537 }
7538
7539 // Custom handling is used for GPR initializations for vararg float
7540 // arguments.
7541 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))
7542 ValVT.isFloatingPoint() && LocVT.isInteger() &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))
7543 "Unexpected register handling for calling convention.")((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))
;
7544
7545 SDValue ArgAsInt =
7546 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7547
7548 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7549 // f32 in 32-bit GPR
7550 // f64 in 64-bit GPR
7551 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7552 else if (Arg.getValueType().getFixedSizeInBits() <
7553 LocVT.getFixedSizeInBits())
7554 // f32 in 64-bit GPR.
7555 RegsToPass.push_back(std::make_pair(
7556 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7557 else {
7558 // f64 in two 32-bit GPRs
7559 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7560 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))
7561 "Unexpected custom register for argument!")((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))
;
7562 CCValAssign &GPR1 = VA;
7563 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7564 DAG.getConstant(32, dl, MVT::i8));
7565 RegsToPass.push_back(std::make_pair(
7566 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7567
7568 if (I != E) {
7569 // If only 1 GPR was available, there will only be one custom GPR and
7570 // the argument will also pass in memory.
7571 CCValAssign &PeekArg = ArgLocs[I];
7572 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7573 assert(PeekArg.needsCustom() && "A second custom GPR is expected.")((PeekArg.needsCustom() && "A second custom GPR is expected."
) ? static_cast<void> (0) : __assert_fail ("PeekArg.needsCustom() && \"A second custom GPR is expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7573, __PRETTY_FUNCTION__))
;
7574 CCValAssign &GPR2 = ArgLocs[I++];
7575 RegsToPass.push_back(std::make_pair(
7576 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7577 }
7578 }
7579 }
7580 }
7581
7582 if (!MemOpChains.empty())
7583 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7584
7585 // For indirect calls, we need to save the TOC base to the stack for
7586 // restoration after the call.
7587 if (CFlags.IsIndirect) {
7588 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7588, __PRETTY_FUNCTION__))
;
7589 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7590 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7591 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7592 const unsigned TOCSaveOffset =
7593 Subtarget.getFrameLowering()->getTOCSaveOffset();
7594
7595 setUsesTOCBasePtr(DAG);
7596 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7597 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7598 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7599 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7600 Chain = DAG.getStore(
7601 Val.getValue(1), dl, Val, AddPtr,
7602 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7603 }
7604
7605 // Build a sequence of copy-to-reg nodes chained together with token chain
7606 // and flag operands which copy the outgoing args into the appropriate regs.
7607 SDValue InFlag;
7608 for (auto Reg : RegsToPass) {
7609 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7610 InFlag = Chain.getValue(1);
7611 }
7612
7613 const int SPDiff = 0;
7614 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7615 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7616}
7617
7618bool
7619PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7620 MachineFunction &MF, bool isVarArg,
7621 const SmallVectorImpl<ISD::OutputArg> &Outs,
7622 LLVMContext &Context) const {
7623 SmallVector<CCValAssign, 16> RVLocs;
7624 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7625 return CCInfo.CheckReturn(
7626 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7627 ? RetCC_PPC_Cold
7628 : RetCC_PPC);
7629}
7630
7631SDValue
7632PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7633 bool isVarArg,
7634 const SmallVectorImpl<ISD::OutputArg> &Outs,
7635 const SmallVectorImpl<SDValue> &OutVals,
7636 const SDLoc &dl, SelectionDAG &DAG) const {
7637 SmallVector<CCValAssign, 16> RVLocs;
7638 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7639 *DAG.getContext());
7640 CCInfo.AnalyzeReturn(Outs,
7641 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7642 ? RetCC_PPC_Cold
7643 : RetCC_PPC);
7644
7645 SDValue Flag;
7646 SmallVector<SDValue, 4> RetOps(1, Chain);
7647
7648 // Copy the result values into the output registers.
7649 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7650 CCValAssign &VA = RVLocs[i];
7651 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7651, __PRETTY_FUNCTION__))
;
7652
7653 SDValue Arg = OutVals[RealResIdx];
7654
7655 if (Subtarget.isAIXABI() &&
7656 (VA.getLocVT().isVector() || VA.getValVT().isVector()))
7657 report_fatal_error("Returning vector types not yet supported on AIX.");
7658
7659 switch (VA.getLocInfo()) {
7660 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7660)
;
7661 case CCValAssign::Full: break;
7662 case CCValAssign::AExt:
7663 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7664 break;
7665 case CCValAssign::ZExt:
7666 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7667 break;
7668 case CCValAssign::SExt:
7669 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7670 break;
7671 }
7672 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7673 bool isLittleEndian = Subtarget.isLittleEndian();
7674 // Legalize ret f64 -> ret 2 x i32.
7675 SDValue SVal =
7676 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7677 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7678 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7679 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7680 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7681 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7682 Flag = Chain.getValue(1);
7683 VA = RVLocs[++i]; // skip ahead to next loc
7684 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7685 } else
7686 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7687 Flag = Chain.getValue(1);
7688 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7689 }
7690
7691 RetOps[0] = Chain; // Update chain.
7692
7693 // Add the flag if we have it.
7694 if (Flag.getNode())
7695 RetOps.push_back(Flag);
7696
7697 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7698}
7699
7700SDValue
7701PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7702 SelectionDAG &DAG) const {
7703 SDLoc dl(Op);
7704
7705 // Get the correct type for integers.
7706 EVT IntVT = Op.getValueType();
7707
7708 // Get the inputs.
7709 SDValue Chain = Op.getOperand(0);
7710 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7711 // Build a DYNAREAOFFSET node.
7712 SDValue Ops[2] = {Chain, FPSIdx};
7713 SDVTList VTs = DAG.getVTList(IntVT);
7714 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7715}
7716
7717SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7718 SelectionDAG &DAG) const {
7719 // When we pop the dynamic allocation we need to restore the SP link.
7720 SDLoc dl(Op);
7721
7722 // Get the correct type for pointers.
7723 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7724
7725 // Construct the stack pointer operand.
7726 bool isPPC64 = Subtarget.isPPC64();
7727 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7728 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7729
7730 // Get the operands for the STACKRESTORE.
7731 SDValue Chain = Op.getOperand(0);
7732 SDValue SaveSP = Op.getOperand(1);
7733
7734 // Load the old link SP.
7735 SDValue LoadLinkSP =
7736 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7737
7738 // Restore the stack pointer.
7739 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7740
7741 // Store the old link SP.
7742 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7743}
7744
7745SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7746 MachineFunction &MF = DAG.getMachineFunction();
7747 bool isPPC64 = Subtarget.isPPC64();
7748 EVT PtrVT = getPointerTy(MF.getDataLayout());
7749
7750 // Get current frame pointer save index. The users of this index will be
7751 // primarily DYNALLOC instructions.
7752 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7753 int RASI = FI->getReturnAddrSaveIndex();
7754
7755 // If the frame pointer save index hasn't been defined yet.
7756 if (!RASI) {
7757 // Find out what the fix offset of the frame pointer save area.
7758 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7759 // Allocate the frame index for frame pointer save area.
7760 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7761 // Save the result.
7762 FI->setReturnAddrSaveIndex(RASI);
7763 }
7764 return DAG.getFrameIndex(RASI, PtrVT);
7765}
7766
7767SDValue
7768PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7769 MachineFunction &MF = DAG.getMachineFunction();
7770 bool isPPC64 = Subtarget.isPPC64();
7771 EVT PtrVT = getPointerTy(MF.getDataLayout());
7772
7773 // Get current frame pointer save index. The users of this index will be
7774 // primarily DYNALLOC instructions.
7775 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7776 int FPSI = FI->getFramePointerSaveIndex();
7777
7778 // If the frame pointer save index hasn't been defined yet.
7779 if (!FPSI) {
7780 // Find out what the fix offset of the frame pointer save area.
7781 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7782 // Allocate the frame index for frame pointer save area.
7783 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7784 // Save the result.
7785 FI->setFramePointerSaveIndex(FPSI);
7786 }
7787 return DAG.getFrameIndex(FPSI, PtrVT);
7788}
7789
7790SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7791 SelectionDAG &DAG) const {
7792 MachineFunction &MF = DAG.getMachineFunction();
7793 // Get the inputs.
7794 SDValue Chain = Op.getOperand(0);
7795 SDValue Size = Op.getOperand(1);
7796 SDLoc dl(Op);
7797
7798 // Get the correct type for pointers.
7799 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7800 // Negate the size.
7801 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7802 DAG.getConstant(0, dl, PtrVT), Size);
7803 // Construct a node for the frame pointer save index.
7804 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7805 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7806 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7807 if (hasInlineStackProbe(MF))
7808 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7809 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7810}
7811
7812SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7813 SelectionDAG &DAG) const {
7814 MachineFunction &MF = DAG.getMachineFunction();
7815
7816 bool isPPC64 = Subtarget.isPPC64();
7817 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7818
7819 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7820 return DAG.getFrameIndex(FI, PtrVT);
7821}
7822
7823SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7824 SelectionDAG &DAG) const {
7825 SDLoc DL(Op);
7826 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7827 DAG.getVTList(MVT::i32, MVT::Other),
7828 Op.getOperand(0), Op.getOperand(1));
7829}
7830
7831SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7832 SelectionDAG &DAG) const {
7833 SDLoc DL(Op);
7834 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7835 Op.getOperand(0), Op.getOperand(1));
7836}
7837
7838SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7839 if (Op.getValueType().isVector())
7840 return LowerVectorLoad(Op, DAG);
7841
7842 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7843, __PRETTY_FUNCTION__))
7843 "Custom lowering only for i1 loads")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7843, __PRETTY_FUNCTION__))
;
7844
7845 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7846
7847 SDLoc dl(Op);
7848 LoadSDNode *LD = cast<LoadSDNode>(Op);
7849
7850 SDValue Chain = LD->getChain();
7851 SDValue BasePtr = LD->getBasePtr();
7852 MachineMemOperand *MMO = LD->getMemOperand();
7853
7854 SDValue NewLD =
7855 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7856 BasePtr, MVT::i8, MMO);
7857 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7858
7859 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7860 return DAG.getMergeValues(Ops, dl);
7861}
7862
7863SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7864 if (Op.getOperand(1).getValueType().isVector())
7865 return LowerVectorStore(Op, DAG);
7866
7867 assert(Op.getOperand(1).getValueType() == MVT::i1 &&((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7868, __PRETTY_FUNCTION__))
7868 "Custom lowering only for i1 stores")((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7868, __PRETTY_FUNCTION__))
;
7869
7870 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7871
7872 SDLoc dl(Op);
7873 StoreSDNode *ST = cast<StoreSDNode>(Op);
7874
7875 SDValue Chain = ST->getChain();
7876 SDValue BasePtr = ST->getBasePtr();
7877 SDValue Value = ST->getValue();
7878 MachineMemOperand *MMO = ST->getMemOperand();
7879
7880 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7881 Value);
7882 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7883}
7884
7885// FIXME: Remove this once the ANDI glue bug is fixed:
7886SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7887 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7888, __PRETTY_FUNCTION__))
7888 "Custom lowering only for i1 results")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7888, __PRETTY_FUNCTION__))
;
7889
7890 SDLoc DL(Op);
7891 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7892}
7893
7894SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7895 SelectionDAG &DAG) const {
7896
7897 // Implements a vector truncate that fits in a vector register as a shuffle.
7898 // We want to legalize vector truncates down to where the source fits in
7899 // a vector register (and target is therefore smaller than vector register
7900 // size). At that point legalization will try to custom lower the sub-legal
7901 // result and get here - where we can contain the truncate as a single target
7902 // operation.
7903
7904 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7905 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7906 //
7907 // We will implement it for big-endian ordering as this (where x denotes
7908 // undefined):
7909 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7910 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7911 //
7912 // The same operation in little-endian ordering will be:
7913 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7914 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7915
7916 EVT TrgVT = Op.getValueType();
7917 assert(TrgVT.isVector() && "Vector type expected.")((TrgVT.isVector() && "Vector type expected.") ? static_cast
<void> (0) : __assert_fail ("TrgVT.isVector() && \"Vector type expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7917, __PRETTY_FUNCTION__))
;
7918 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7919 EVT EltVT = TrgVT.getVectorElementType();
7920 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7921 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7922 !isPowerOf2_32(EltVT.getSizeInBits()))
7923 return SDValue();
7924
7925 SDValue N1 = Op.getOperand(0);
7926 EVT SrcVT = N1.getValueType();
7927 unsigned SrcSize = SrcVT.getSizeInBits();
7928 if (SrcSize > 256 ||
7929 !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7930 !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7931 return SDValue();
7932 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7933 return SDValue();
7934
7935 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7936 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7937
7938 SDLoc DL(Op);
7939 SDValue Op1, Op2;
7940 if (SrcSize == 256) {
7941 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7942 EVT SplitVT =
7943 N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7944 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7945 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7946 DAG.getConstant(0, DL, VecIdxTy));
7947 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7948 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7949 }
7950 else {
7951 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7952 Op2 = DAG.getUNDEF(WideVT);
7953 }
7954
7955 // First list the elements we want to keep.
7956 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7957 SmallVector<int, 16> ShuffV;
7958 if (Subtarget.isLittleEndian())
7959 for (unsigned i = 0; i < TrgNumElts; ++i)
7960 ShuffV.push_back(i * SizeMult);
7961 else
7962 for (unsigned i = 1; i <= TrgNumElts; ++i)
7963 ShuffV.push_back(i * SizeMult - 1);
7964
7965 // Populate the remaining elements with undefs.
7966 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7967 // ShuffV.push_back(i + WideNumElts);
7968 ShuffV.push_back(WideNumElts + 1);
7969
7970 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7971 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7972 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7973}
7974
7975/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7976/// possible.
7977SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7978 // Not FP, or using SPE? Not a fsel.
7979 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7980 !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7981 return Op;
7982
7983 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7984
7985 EVT ResVT = Op.getValueType();
7986 EVT CmpVT = Op.getOperand(0).getValueType();
7987 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7988 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7989 SDLoc dl(Op);
7990 SDNodeFlags Flags = Op.getNode()->getFlags();
7991
7992 // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7993 // presence of infinities.
7994 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7995 switch (CC) {
7996 default:
7997 break;
7998 case ISD::SETOGT:
7999 case ISD::SETGT:
8000 return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
8001 case ISD::SETOLT:
8002 case ISD::SETLT:
8003 return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8004 }
8005 }
8006
8007 // We might be able to do better than this under some circumstances, but in
8008 // general, fsel-based lowering of select is a finite-math-only optimization.
8009 // For more information, see section F.3 of the 2.06 ISA specification.
8010 // With ISA 3.0
8011 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8012 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8013 return Op;
8014
8015 // If the RHS of the comparison is a 0.0, we don't need to do the
8016 // subtraction at all.
8017 SDValue Sel1;
8018 if (isFloatingPointZero(RHS))
8019 switch (CC) {
8020 default: break; // SETUO etc aren't handled by fsel.
8021 case ISD::SETNE:
8022 std::swap(TV, FV);
8023 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8024 case ISD::SETEQ:
8025 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8026 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8027 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8028 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8029 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8030 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8031 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8032 case ISD::SETULT:
8033 case ISD::SETLT:
8034 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8035 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8036 case ISD::SETOGE:
8037 case ISD::SETGE:
8038 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8039 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8040 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8041 case ISD::SETUGT:
8042 case ISD::SETGT:
8043 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8044 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8045 case ISD::SETOLE:
8046 case ISD::SETLE:
8047 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8048 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8049 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8050 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8051 }
8052
8053 SDValue Cmp;
8054 switch (CC) {
8055 default: break; // SETUO etc aren't handled by fsel.
8056 case ISD::SETNE:
8057 std::swap(TV, FV);
8058 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8059 case ISD::SETEQ:
8060 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8061 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8062 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8063 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8064 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8065 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8066 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8067 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8068 case ISD::SETULT:
8069 case ISD::SETLT:
8070 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8071 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8072 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8073 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8074 case ISD::SETOGE:
8075 case ISD::SETGE:
8076 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8077 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8078 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8079 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8080 case ISD::SETUGT:
8081 case ISD::SETGT:
8082 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8083 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8084 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8085 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8086 case ISD::SETOLE:
8087 case ISD::SETLE:
8088 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8089 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8090 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8091 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8092 }
8093 return Op;
8094}
8095
8096static unsigned getPPCStrictOpcode(unsigned Opc) {
8097 switch (Opc) {
8098 default:
8099 llvm_unreachable("No strict version of this opcode!")::llvm::llvm_unreachable_internal("No strict version of this opcode!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8099)
;
8100 case PPCISD::FCTIDZ:
8101 return PPCISD::STRICT_FCTIDZ;
8102 case PPCISD::FCTIWZ:
8103 return PPCISD::STRICT_FCTIWZ;
8104 case PPCISD::FCTIDUZ:
8105 return PPCISD::STRICT_FCTIDUZ;
8106 case PPCISD::FCTIWUZ:
8107 return PPCISD::STRICT_FCTIWUZ;
8108 case PPCISD::FCFID:
8109 return PPCISD::STRICT_FCFID;
8110 case PPCISD::FCFIDU:
8111 return PPCISD::STRICT_FCFIDU;
8112 case PPCISD::FCFIDS:
8113 return PPCISD::STRICT_FCFIDS;
8114 case PPCISD::FCFIDUS:
8115 return PPCISD::STRICT_FCFIDUS;
8116 }
8117}
8118
8119static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8120 const PPCSubtarget &Subtarget) {
8121 SDLoc dl(Op);
8122 bool IsStrict = Op->isStrictFPOpcode();
8123 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8124 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8125
8126 // TODO: Any other flags to propagate?
8127 SDNodeFlags Flags;
8128 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8129
8130 // For strict nodes, source is the second operand.
8131 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8132 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8133 assert(Src.getValueType().isFloatingPoint())((Src.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("Src.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8133, __PRETTY_FUNCTION__))
;
8134 if (Src.getValueType() == MVT::f32) {
8135 if (IsStrict) {
8136 Src =
8137 DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8138 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8139 Chain = Src.getValue(1);
8140 } else
8141 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8142 }
8143 SDValue Conv;
8144 unsigned Opc = ISD::DELETED_NODE;
8145 switch (Op.getSimpleValueType().SimpleTy) {
8146 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8146)
;
8147 case MVT::i32:
8148 Opc = IsSigned ? PPCISD::FCTIWZ
8149 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8150 break;
8151 case MVT::i64:
8152 assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8153, __PRETTY_FUNCTION__))
8153 "i64 FP_TO_UINT is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8153, __PRETTY_FUNCTION__))
;
8154 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8155 }
8156 if (IsStrict) {
8157 Opc = getPPCStrictOpcode(Opc);
8158 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8159 {Chain, Src}, Flags);
8160 } else {
8161 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8162 }
8163 return Conv;
8164}
8165
8166void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8167 SelectionDAG &DAG,
8168 const SDLoc &dl) const {
8169 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8170 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8171 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8172 bool IsStrict = Op->isStrictFPOpcode();
8173
8174 // Convert the FP value to an int value through memory.
8175 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8176 (IsSigned || Subtarget.hasFPCVT());
8177 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8178 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8179 MachinePointerInfo MPI =
8180 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8181
8182 // Emit a store to the stack slot.
8183 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8184 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8185 if (i32Stack) {
8186 MachineFunction &MF = DAG.getMachineFunction();
8187 Alignment = Align(4);
8188 MachineMemOperand *MMO =
8189 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8190 SDValue Ops[] = { Chain, Tmp, FIPtr };
8191 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8192 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8193 } else
8194 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8195
8196 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8197 // add in a bias on big endian.
8198 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8199 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8200 DAG.getConstant(4, dl, FIPtr.getValueType()));
8201 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8202 }
8203
8204 RLI.Chain = Chain;
8205 RLI.Ptr = FIPtr;
8206 RLI.MPI = MPI;
8207 RLI.Alignment = Alignment;
8208}
8209
8210/// Custom lowers floating point to integer conversions to use
8211/// the direct move instructions available in ISA 2.07 to avoid the
8212/// need for load/store combinations.
8213SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8214 SelectionDAG &DAG,
8215 const SDLoc &dl) const {
8216 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8217 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8218 if (Op->isStrictFPOpcode())
8219 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8220 else
8221 return Mov;
8222}
8223
8224SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8225 const SDLoc &dl) const {
8226 bool IsStrict = Op->isStrictFPOpcode();
8227 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8228 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8229 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8230 EVT SrcVT = Src.getValueType();
8231 EVT DstVT = Op.getValueType();
8232
8233 // FP to INT conversions are legal for f128.
8234 if (SrcVT == MVT::f128)
8235 return Op;
8236
8237 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8238 // PPC (the libcall is not available).
8239 if (SrcVT == MVT::ppcf128) {
8240 if (DstVT == MVT::i32) {
8241 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8242 // set other fast-math flags to FP operations in both strict and
8243 // non-strict cases. (FP_TO_SINT, FSUB)
8244 SDNodeFlags Flags;
8245 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8246
8247 if (IsSigned) {
8248 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8249 DAG.getIntPtrConstant(0, dl));
8250 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8251 DAG.getIntPtrConstant(1, dl));
8252
8253 // Add the two halves of the long double in round-to-zero mode, and use
8254 // a smaller FP_TO_SINT.
8255 if (IsStrict) {
8256 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8257 DAG.getVTList(MVT::f64, MVT::Other),
8258 {Op.getOperand(0), Lo, Hi}, Flags);
8259 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8260 DAG.getVTList(MVT::i32, MVT::Other),
8261 {Res.getValue(1), Res}, Flags);
8262 } else {
8263 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8264 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8265 }
8266 } else {
8267 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8268 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8269 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8270 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8271 if (IsStrict) {
8272 // Sel = Src < 0x80000000
8273 // FltOfs = select Sel, 0.0, 0x80000000
8274 // IntOfs = select Sel, 0, 0x80000000
8275 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8276 SDValue Chain = Op.getOperand(0);
8277 EVT SetCCVT =
8278 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8279 EVT DstSetCCVT =
8280 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8281 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8282 Chain, true);
8283 Chain = Sel.getValue(1);
8284
8285 SDValue FltOfs = DAG.getSelect(
8286 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8287 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8288
8289 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8290 DAG.getVTList(SrcVT, MVT::Other),
8291 {Chain, Src, FltOfs}, Flags);
8292 Chain = Val.getValue(1);
8293 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8294 DAG.getVTList(DstVT, MVT::Other),
8295 {Chain, Val}, Flags);
8296 Chain = SInt.getValue(1);
8297 SDValue IntOfs = DAG.getSelect(
8298 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8299 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8300 return DAG.getMergeValues({Result, Chain}, dl);
8301 } else {
8302 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8303 // FIXME: generated code sucks.
8304 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8305 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8306 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8307 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8308 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8309 }
8310 }
8311 }
8312
8313 return SDValue();
8314 }
8315
8316 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8317 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8318
8319 ReuseLoadInfo RLI;
8320 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8321
8322 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8323 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8324}
8325
8326// We're trying to insert a regular store, S, and then a load, L. If the
8327// incoming value, O, is a load, we might just be able to have our load use the
8328// address used by O. However, we don't know if anything else will store to
8329// that address before we can load from it. To prevent this situation, we need
8330// to insert our load, L, into the chain as a peer of O. To do this, we give L
8331// the same chain operand as O, we create a token factor from the chain results
8332// of O and L, and we replace all uses of O's chain result with that token
8333// factor (see spliceIntoChain below for this last part).
8334bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8335 ReuseLoadInfo &RLI,
8336 SelectionDAG &DAG,
8337 ISD::LoadExtType ET) const {
8338 // Conservatively skip reusing for constrained FP nodes.
8339 if (Op->isStrictFPOpcode())
8340 return false;
8341
8342 SDLoc dl(Op);
8343 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8344 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8345 if (ET == ISD::NON_EXTLOAD &&
8346 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8347 isOperationLegalOrCustom(Op.getOpcode(),
8348 Op.getOperand(0).getValueType())) {
8349
8350 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8351 return true;
8352 }
8353
8354 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8355 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8356 LD->isNonTemporal())
8357 return false;
8358 if (LD->getMemoryVT() != MemVT)
8359 return false;
8360
8361 RLI.Ptr = LD->getBasePtr();
8362 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8363 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8364, __PRETTY_FUNCTION__))
8364 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8364, __PRETTY_FUNCTION__))
;
8365 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8366 LD->getOffset());
8367 }
8368
8369 RLI.Chain = LD->getChain();
8370 RLI.MPI = LD->getPointerInfo();
8371 RLI.IsDereferenceable = LD->isDereferenceable();
8372 RLI.IsInvariant = LD->isInvariant();
8373 RLI.Alignment = LD->getAlign();
8374 RLI.AAInfo = LD->getAAInfo();
8375 RLI.Ranges = LD->getRanges();
8376
8377 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8378 return true;
8379}
8380
8381// Given the head of the old chain, ResChain, insert a token factor containing
8382// it and NewResChain, and make users of ResChain now be users of that token
8383// factor.
8384// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8385void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8386 SDValue NewResChain,
8387 SelectionDAG &DAG) const {
8388 if (!ResChain)
8389 return;
8390
8391 SDLoc dl(NewResChain);
8392
8393 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8394 NewResChain, DAG.getUNDEF(MVT::Other));
8395 assert(TF.getNode() != NewResChain.getNode() &&((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8396, __PRETTY_FUNCTION__))
8396 "A new TF really is required here")((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8396, __PRETTY_FUNCTION__))
;
8397
8398 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8399 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8400}
8401
8402/// Analyze profitability of direct move
8403/// prefer float load to int load plus direct move
8404/// when there is no integer use of int load
8405bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8406 SDNode *Origin = Op.getOperand(0).getNode();
8407 if (Origin->getOpcode() != ISD::LOAD)
8408 return true;
8409
8410 // If there is no LXSIBZX/LXSIHZX, like Power8,
8411 // prefer direct move if the memory size is 1 or 2 bytes.
8412 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8413 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8414 return true;
8415
8416 for (SDNode::use_iterator UI = Origin->use_begin(),
8417 UE = Origin->use_end();
8418 UI != UE; ++UI) {
8419
8420 // Only look at the users of the loaded value.
8421 if (UI.getUse().get().getResNo() != 0)
8422 continue;
8423
8424 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8425 UI->getOpcode() != ISD::UINT_TO_FP &&
8426 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8427 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8428 return true;
8429 }
8430
8431 return false;
8432}
8433
8434static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8435 const PPCSubtarget &Subtarget,
8436 SDValue Chain = SDValue()) {
8437 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8438 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8439 SDLoc dl(Op);
8440
8441 // TODO: Any other flags to propagate?
8442 SDNodeFlags Flags;
8443 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8444
8445 // If we have FCFIDS, then use it when converting to single-precision.
8446 // Otherwise, convert to double-precision and then round.
8447 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8448 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8449 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8450 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8451 if (Op->isStrictFPOpcode()) {
8452 if (!Chain)
8453 Chain = Op.getOperand(0);
8454 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8455 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8456 } else
8457 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8458}
8459
8460/// Custom lowers integer to floating point conversions to use
8461/// the direct move instructions available in ISA 2.07 to avoid the
8462/// need for load/store combinations.
8463SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8464 SelectionDAG &DAG,
8465 const SDLoc &dl) const {
8466 assert((Op.getValueType() == MVT::f32 ||(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))
8467 Op.getValueType() == MVT::f64) &&(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))
8468 "Invalid floating point type as target of conversion")(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))
;
8469 assert(Subtarget.hasFPCVT() &&((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8470, __PRETTY_FUNCTION__))
8470 "Int to FP conversions with direct moves require FPCVT")((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8470, __PRETTY_FUNCTION__))
;
8471 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8472 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8473 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8474 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8475 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8476 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8477 return convertIntToFP(Op, Mov, DAG, Subtarget);
8478}
8479
8480static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8481
8482 EVT VecVT = Vec.getValueType();
8483 assert(VecVT.isVector() && "Expected a vector type.")((VecVT.isVector() && "Expected a vector type.") ? static_cast
<void> (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8483, __PRETTY_FUNCTION__))
;
8484 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")((VecVT.getSizeInBits() < 128 && "Vector is already full width."
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8484, __PRETTY_FUNCTION__))
;
8485
8486 EVT EltVT = VecVT.getVectorElementType();
8487 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8488 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8489
8490 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8491 SmallVector<SDValue, 16> Ops(NumConcat);
8492 Ops[0] = Vec;
8493 SDValue UndefVec = DAG.getUNDEF(VecVT);
8494 for (unsigned i = 1; i < NumConcat; ++i)
8495 Ops[i] = UndefVec;
8496
8497 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8498}
8499
8500SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8501 const SDLoc &dl) const {
8502 bool IsStrict = Op->isStrictFPOpcode();
8503 unsigned Opc = Op.getOpcode();
8504 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8505 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))
8506 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))
8507 "Unexpected conversion type")(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))
;
8508 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8509, __PRETTY_FUNCTION__))
8509 "Supports conversions to v2f64/v4f32 only.")(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8509, __PRETTY_FUNCTION__))
;
8510
8511 // TODO: Any other flags to propagate?
8512 SDNodeFlags Flags;
8513 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8514
8515 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8516 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8517
8518 SDValue Wide = widenVec(DAG, Src, dl);
8519 EVT WideVT = Wide.getValueType();
8520 unsigned WideNumElts = WideVT.getVectorNumElements();
8521 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8522
8523 SmallVector<int, 16> ShuffV;
8524 for (unsigned i = 0; i < WideNumElts; ++i)
8525 ShuffV.push_back(i + WideNumElts);
8526
8527 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8528 int SaveElts = FourEltRes ? 4 : 2;
8529 if (Subtarget.isLittleEndian())
8530 for (int i = 0; i < SaveElts; i++)
8531 ShuffV[i * Stride] = i;
8532 else
8533 for (int i = 1; i <= SaveElts; i++)
8534 ShuffV[i * Stride - 1] = i - 1;
8535
8536 SDValue ShuffleSrc2 =
8537 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8538 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8539
8540 SDValue Extend;
8541 if (SignedConv) {
8542 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8543 EVT ExtVT = Src.getValueType();
8544 if (Subtarget.hasP9Altivec())
8545 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8546 IntermediateVT.getVectorNumElements());
8547
8548 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8549 DAG.getValueType(ExtVT));
8550 } else
8551 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8552
8553 if (IsStrict)
8554 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8555 {Op.getOperand(0), Extend}, Flags);
8556
8557 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8558}
8559
8560SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8561 SelectionDAG &DAG) const {
8562 SDLoc dl(Op);
8563 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8564 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8565 bool IsStrict = Op->isStrictFPOpcode();
8566 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8567 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8568
8569 // TODO: Any other flags to propagate?
8570 SDNodeFlags Flags;
8571 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8572
8573 EVT InVT = Src.getValueType();
8574 EVT OutVT = Op.getValueType();
8575 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8576 isOperationCustom(Op.getOpcode(), InVT))
8577 return LowerINT_TO_FPVector(Op, DAG, dl);
8578
8579 // Conversions to f128 are legal.
8580 if (Op.getValueType() == MVT::f128)
8581 return Op;
8582
8583 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8584 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8585 return SDValue();
8586
8587 if (Src.getValueType() == MVT::i1)
8588 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8589 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8590 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8591
8592 // If we have direct moves, we can do all the conversion, skip the store/load
8593 // however, without FPCVT we can't do most conversions.
8594 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8595 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8596 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8597
8598 assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8599, __PRETTY_FUNCTION__))
8599 "UINT_TO_FP is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8599, __PRETTY_FUNCTION__))
;
8600
8601 if (Src.getValueType() == MVT::i64) {
8602 SDValue SINT = Src;
8603 // When converting to single-precision, we actually need to convert
8604 // to double-precision first and then round to single-precision.
8605 // To avoid double-rounding effects during that operation, we have
8606 // to prepare the input operand. Bits that might be truncated when
8607 // converting to double-precision are replaced by a bit that won't
8608 // be lost at this stage, but is below the single-precision rounding
8609 // position.
8610 //
8611 // However, if -enable-unsafe-fp-math is in effect, accept double
8612 // rounding to avoid the extra overhead.
8613 if (Op.getValueType() == MVT::f32 &&
8614 !Subtarget.hasFPCVT() &&
8615 !DAG.getTarget().Options.UnsafeFPMath) {
8616
8617 // Twiddle input to make sure the low 11 bits are zero. (If this
8618 // is the case, we are guaranteed the value will fit into the 53 bit
8619 // mantissa of an IEEE double-precision value without rounding.)
8620 // If any of those low 11 bits were not zero originally, make sure
8621 // bit 12 (value 2048) is set instead, so that the final rounding
8622 // to single-precision gets the correct result.
8623 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8624 SINT, DAG.getConstant(2047, dl, MVT::i64));
8625 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8626 Round, DAG.getConstant(2047, dl, MVT::i64));
8627 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8628 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8629 Round, DAG.getConstant(-2048, dl, MVT::i64));
8630
8631 // However, we cannot use that value unconditionally: if the magnitude
8632 // of the input value is small, the bit-twiddling we did above might
8633 // end up visibly changing the output. Fortunately, in that case, we
8634 // don't need to twiddle bits since the original input will convert
8635 // exactly to double-precision floating-point already. Therefore,
8636 // construct a conditional to use the original value if the top 11
8637 // bits are all sign-bit copies, and use the rounded value computed
8638 // above otherwise.
8639 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8640 SINT, DAG.getConstant(53, dl, MVT::i32));
8641 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8642 Cond, DAG.getConstant(1, dl, MVT::i64));
8643 Cond = DAG.getSetCC(
8644 dl,
8645 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8646 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8647
8648 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8649 }
8650
8651 ReuseLoadInfo RLI;
8652 SDValue Bits;
8653
8654 MachineFunction &MF = DAG.getMachineFunction();
8655 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8656 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8657 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8658 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8659 } else if (Subtarget.hasLFIWAX() &&
8660 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8661 MachineMemOperand *MMO =
8662 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8663 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8664 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8665 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8666 DAG.getVTList(MVT::f64, MVT::Other),
8667 Ops, MVT::i32, MMO);
8668 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8669 } else if (Subtarget.hasFPCVT() &&
8670 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8671 MachineMemOperand *MMO =
8672 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8673 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8674 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8675 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8676 DAG.getVTList(MVT::f64, MVT::Other),
8677 Ops, MVT::i32, MMO);
8678 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8679 } else if (((Subtarget.hasLFIWAX() &&
8680 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8681 (Subtarget.hasFPCVT() &&
8682 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8683 SINT.getOperand(0).getValueType() == MVT::i32) {
8684 MachineFrameInfo &MFI = MF.getFrameInfo();
8685 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8686
8687 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8688 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8689
8690 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8691 MachinePointerInfo::getFixedStack(
8692 DAG.getMachineFunction(), FrameIdx));
8693 Chain = Store;
8694
8695 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8696, __PRETTY_FUNCTION__))
8696 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8696, __PRETTY_FUNCTION__))
;
8697
8698 RLI.Ptr = FIdx;
8699 RLI.Chain = Chain;
8700 RLI.MPI =
8701 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8702 RLI.Alignment = Align(4);
8703
8704 MachineMemOperand *MMO =
8705 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8706 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8707 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8708 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8709 PPCISD::LFIWZX : PPCISD::LFIWAX,
8710 dl, DAG.getVTList(MVT::f64, MVT::Other),
8711 Ops, MVT::i32, MMO);
8712 Chain = Bits.getValue(1);
8713 } else
8714 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8715
8716 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8717 if (IsStrict)
8718 Chain = FP.getValue(1);
8719
8720 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8721 if (IsStrict)
8722 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8723 DAG.getVTList(MVT::f32, MVT::Other),
8724 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8725 else
8726 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8727 DAG.getIntPtrConstant(0, dl));
8728 }
8729 return FP;
8730 }
8731
8732 assert(Src.getValueType() == MVT::i32 &&((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8733, __PRETTY_FUNCTION__))
8733 "Unhandled INT_TO_FP type in custom expander!")((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8733, __PRETTY_FUNCTION__))
;
8734 // Since we only generate this in 64-bit mode, we can take advantage of
8735 // 64-bit registers. In particular, sign extend the input value into the
8736 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8737 // then lfd it and fcfid it.
8738 MachineFunction &MF = DAG.getMachineFunction();
8739 MachineFrameInfo &MFI = MF.getFrameInfo();
8740 EVT PtrVT = getPointerTy(MF.getDataLayout());
8741
8742 SDValue Ld;
8743 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8744 ReuseLoadInfo RLI;
8745 bool ReusingLoad;
8746 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8747 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8748 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8749
8750 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8751 MachinePointerInfo::getFixedStack(
8752 DAG.getMachineFunction(), FrameIdx));
8753 Chain = Store;
8754
8755 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8756, __PRETTY_FUNCTION__))
8756 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8756, __PRETTY_FUNCTION__))
;
8757
8758 RLI.Ptr = FIdx;
8759 RLI.Chain = Chain;
8760 RLI.MPI =
8761 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8762 RLI.Alignment = Align(4);
8763 }
8764
8765 MachineMemOperand *MMO =
8766 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8767 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8768 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8769 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8770 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8771 MVT::i32, MMO);
8772 Chain = Ld.getValue(1);
8773 if (ReusingLoad)
8774 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8775 } else {
8776 assert(Subtarget.isPPC64() &&((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8777, __PRETTY_FUNCTION__))
8777 "i32->FP without LFIWAX supported only on PPC64")((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8777, __PRETTY_FUNCTION__))
;
8778
8779 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8780 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8781
8782 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8783
8784 // STD the extended value into the stack slot.
8785 SDValue Store = DAG.getStore(
8786 Chain, dl, Ext64, FIdx,
8787 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8788 Chain = Store;
8789
8790 // Load the value as a double.
8791 Ld = DAG.getLoad(
8792 MVT::f64, dl, Chain, FIdx,
8793 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8794 Chain = Ld.getValue(1);
8795 }
8796
8797 // FCFID it and return it.
8798 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8799 if (IsStrict)
8800 Chain = FP.getValue(1);
8801 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8802 if (IsStrict)
8803 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8804 DAG.getVTList(MVT::f32, MVT::Other),
8805 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8806 else
8807 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8808 DAG.getIntPtrConstant(0, dl));
8809 }
8810 return FP;
8811}
8812
8813SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8814 SelectionDAG &DAG) const {
8815 SDLoc dl(Op);
8816 /*
8817 The rounding mode is in bits 30:31 of FPSR, and has the following
8818 settings:
8819 00 Round to nearest
8820 01 Round to 0
8821 10 Round to +inf
8822 11 Round to -inf
8823
8824 FLT_ROUNDS, on the other hand, expects the following:
8825 -1 Undefined
8826 0 Round to 0
8827 1 Round to nearest
8828 2 Round to +inf
8829 3 Round to -inf
8830
8831 To perform the conversion, we do:
8832 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8833 */
8834
8835 MachineFunction &MF = DAG.getMachineFunction();
8836 EVT VT = Op.getValueType();
8837 EVT PtrVT = getPointerTy(MF.getDataLayout());
8838
8839 // Save FP Control Word to register
8840 SDValue Chain = Op.getOperand(0);
8841 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8842 Chain = MFFS.getValue(1);
8843
8844 // Save FP register to stack slot
8845 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8846 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8847 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8848
8849 // Load FP Control Word from low 32 bits of stack slot.
8850 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8851 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8852 SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8853 Chain = CWD.getValue(1);
8854
8855 // Transform as necessary
8856 SDValue CWD1 =
8857 DAG.getNode(ISD::AND, dl, MVT::i32,
8858 CWD, DAG.getConstant(3, dl, MVT::i32));
8859 SDValue CWD2 =
8860 DAG.getNode(ISD::SRL, dl, MVT::i32,
8861 DAG.getNode(ISD::AND, dl, MVT::i32,
8862 DAG.getNode(ISD::XOR, dl, MVT::i32,
8863 CWD, DAG.getConstant(3, dl, MVT::i32)),
8864 DAG.getConstant(3, dl, MVT::i32)),
8865 DAG.getConstant(1, dl, MVT::i32));
8866
8867 SDValue RetVal =
8868 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8869
8870 RetVal =
8871 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8872 dl, VT, RetVal);
8873
8874 return DAG.getMergeValues({RetVal, Chain}, dl);
8875}
8876
8877SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8878 EVT VT = Op.getValueType();
8879 unsigned BitWidth = VT.getSizeInBits();
8880 SDLoc dl(Op);
8881 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))
8882 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))
8883 "Unexpected SHL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))
;
8884
8885 // Expand into a bunch of logical ops. Note that these ops
8886 // depend on the PPC behavior for oversized shift amounts.
8887 SDValue Lo = Op.getOperand(0);
8888 SDValue Hi = Op.getOperand(1);
8889 SDValue Amt = Op.getOperand(2);
8890 EVT AmtVT = Amt.getValueType();
8891
8892 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8893 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8894 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8895 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8896 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8897 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8898 DAG.getConstant(-BitWidth, dl, AmtVT));
8899 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8900 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8901 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8902 SDValue OutOps[] = { OutLo, OutHi };
8903 return DAG.getMergeValues(OutOps, dl);
8904}
8905
8906SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8907 EVT VT = Op.getValueType();
8908 SDLoc dl(Op);
8909 unsigned BitWidth = VT.getSizeInBits();
8910 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))
8911 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))
8912 "Unexpected SRL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))
;
8913
8914 // Expand into a bunch of logical ops. Note that these ops
8915 // depend on the PPC behavior for oversized shift amounts.
8916 SDValue Lo = Op.getOperand(0);
8917 SDValue Hi = Op.getOperand(1);
8918 SDValue Amt = Op.getOperand(2);
8919 EVT AmtVT = Amt.getValueType();
8920
8921 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8922 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8923 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8924 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8925 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8926 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8927 DAG.getConstant(-BitWidth, dl, AmtVT));
8928 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8929 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8930 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8931 SDValue OutOps[] = { OutLo, OutHi };
8932 return DAG.getMergeValues(OutOps, dl);
8933}
8934
8935SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8936 SDLoc dl(Op);
8937 EVT VT = Op.getValueType();
8938 unsigned BitWidth = VT.getSizeInBits();
8939 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))
8940 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))
8941 "Unexpected SRA!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))
;
8942
8943 // Expand into a bunch of logical ops, followed by a select_cc.
8944 SDValue Lo = Op.getOperand(0);
8945 SDValue Hi = Op.getOperand(1);
8946 SDValue Amt = Op.getOperand(2);
8947 EVT AmtVT = Amt.getValueType();
8948
8949 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8950 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8951 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8952 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8953 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8954 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8955 DAG.getConstant(-BitWidth, dl, AmtVT));
8956 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8957 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8958 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8959 Tmp4, Tmp6, ISD::SETLE);
8960 SDValue OutOps[] = { OutLo, OutHi };
8961 return DAG.getMergeValues(OutOps, dl);
8962}
8963
8964SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8965 SelectionDAG &DAG) const {
8966 SDLoc dl(Op);
8967 EVT VT = Op.getValueType();
8968 unsigned BitWidth = VT.getSizeInBits();
8969
8970 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8971 SDValue X = Op.getOperand(0);
8972 SDValue Y = Op.getOperand(1);
8973 SDValue Z = Op.getOperand(2);
8974 EVT AmtVT = Z.getValueType();
8975
8976 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8977 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8978 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8979 // on PowerPC shift by BW being well defined.
8980 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8981 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8982 SDValue SubZ =
8983 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8984 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8985 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8986 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8987}
8988
8989//===----------------------------------------------------------------------===//
8990// Vector related lowering.
8991//
8992
8993/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8994/// element size of SplatSize. Cast the result to VT.
8995static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8996 SelectionDAG &DAG, const SDLoc &dl) {
8997 static const MVT VTys[] = { // canonical VT to use for each size.
8998 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8999 };
9000
9001 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9002
9003 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9004 if (Val == ((1LU << (SplatSize * 8)) - 1)) {
9005 SplatSize = 1;
9006 Val = 0xFF;
9007 }
9008
9009 EVT CanonicalVT = VTys[SplatSize-1];
9010
9011 // Build a canonical splat for this value.
9012 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9013}
9014
9015/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9016/// specified intrinsic ID.
9017static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9018 const SDLoc &dl, EVT DestVT = MVT::Other) {
9019 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9020 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9021 DAG.getConstant(IID, dl, MVT::i32), Op);
9022}
9023
9024/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9025/// specified intrinsic ID.
9026static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9027 SelectionDAG &DAG, const SDLoc &dl,
9028 EVT DestVT = MVT::Other) {
9029 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9030 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9031 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9032}
9033
9034/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9035/// specified intrinsic ID.
9036static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9037 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9038 EVT DestVT = MVT::Other) {
9039 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9040 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9041 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9042}
9043
9044/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9045/// amount. The result has the specified value type.
9046static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9047 SelectionDAG &DAG, const SDLoc &dl) {
9048 // Force LHS/RHS to be the right type.
9049 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9050 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9051
9052 int Ops[16];
9053 for (unsigned i = 0; i != 16; ++i)
9054 Ops[i] = i + Amt;
9055 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9056 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9057}
9058
9059/// Do we have an efficient pattern in a .td file for this node?
9060///
9061/// \param V - pointer to the BuildVectorSDNode being matched
9062/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9063///
9064/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9065/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9066/// the opposite is true (expansion is beneficial) are:
9067/// - The node builds a vector out of integers that are not 32 or 64-bits
9068/// - The node builds a vector out of constants
9069/// - The node is a "load-and-splat"
9070/// In all other cases, we will choose to keep the BUILD_VECTOR.
9071static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9072 bool HasDirectMove,
9073 bool HasP8Vector) {
9074 EVT VecVT = V->getValueType(0);
9075 bool RightType = VecVT == MVT::v2f64 ||
9076 (HasP8Vector && VecVT == MVT::v4f32) ||
9077 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9078 if (!RightType)
9079 return false;
9080
9081 bool IsSplat = true;
9082 bool IsLoad = false;
9083 SDValue Op0 = V->getOperand(0);
9084
9085 // This function is called in a block that confirms the node is not a constant
9086 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9087 // different constants.
9088 if (V->isConstant())
9089 return false;
9090 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9091 if (V->getOperand(i).isUndef())
9092 return false;
9093 // We want to expand nodes that represent load-and-splat even if the
9094 // loaded value is a floating point truncation or conversion to int.
9095 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9096 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9097 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9098 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9099 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9100 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9101 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9102 IsLoad = true;
9103 // If the operands are different or the input is not a load and has more
9104 // uses than just this BV node, then it isn't a splat.
9105 if (V->getOperand(i) != Op0 ||
9106 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9107 IsSplat = false;
9108 }
9109 return !(IsSplat && IsLoad);
9110}
9111
9112// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9113SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9114
9115 SDLoc dl(Op);
9116 SDValue Op0 = Op->getOperand(0);
9117
9118 if ((Op.getValueType() != MVT::f128) ||
9119 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9120 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9121 (Op0.getOperand(1).getValueType() != MVT::i64))
9122 return SDValue();
9123
9124 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9125 Op0.getOperand(1));
9126}
9127
9128static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9129 const SDValue *InputLoad = &Op;
9130 if (InputLoad->getOpcode() == ISD::BITCAST)
8
Assuming the condition is false
9
Taking false branch
9131 InputLoad = &InputLoad->getOperand(0);
9132 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
10
Assuming the condition is false
12
Taking false branch
9133 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
11
Assuming the condition is false
9134 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9135 InputLoad = &InputLoad->getOperand(0);
9136 }
9137 if (InputLoad->getOpcode() != ISD::LOAD)
13
Assuming the condition is false
14
Taking false branch
9138 return nullptr;
9139 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9140 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
15
'?' condition is false
16
Returning null pointer, which participates in a condition later
9141}
9142
9143// Convert the argument APFloat to a single precision APFloat if there is no
9144// loss in information during the conversion to single precision APFloat and the
9145// resulting number is not a denormal number. Return true if successful.
9146bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9147 APFloat APFloatToConvert = ArgAPFloat;
9148 bool LosesInfo = true;
9149 APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9150 &LosesInfo);
9151 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9152 if (Success)
9153 ArgAPFloat = APFloatToConvert;
9154 return Success;
9155}
9156
9157// Bitcast the argument APInt to a double and convert it to a single precision
9158// APFloat, bitcast the APFloat to an APInt and assign it to the original
9159// argument if there is no loss in information during the conversion from
9160// double to single precision APFloat and the resulting number is not a denormal
9161// number. Return true if successful.
9162bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9163 double DpValue = ArgAPInt.bitsToDouble();
9164 APFloat APFloatDp(DpValue);
9165 bool Success = convertToNonDenormSingle(APFloatDp);
9166 if (Success)
9167 ArgAPInt = APFloatDp.bitcastToAPInt();
9168 return Success;
9169}
9170
9171// If this is a case we can't handle, return null and let the default
9172// expansion code take care of it. If we CAN select this case, and if it
9173// selects to a single instruction, return Op. Otherwise, if we can codegen
9174// this case more efficiently than a constant pool load, lower it to the
9175// sequence of ops that should be used.
9176SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9177 SelectionDAG &DAG) const {
9178 SDLoc dl(Op);
9179 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9180 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")((BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9180, __PRETTY_FUNCTION__))
;
9181
9182 // Check if this is a splat of a constant value.
9183 APInt APSplatBits, APSplatUndef;
9184 unsigned SplatBitSize;
9185 bool HasAnyUndefs;
9186 bool BVNIsConstantSplat =
9187 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9188 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9189
9190 // If it is a splat of a double, check if we can shrink it to a 32 bit
9191 // non-denormal float which when converted back to double gives us the same
9192 // double. This is to exploit the XXSPLTIDP instruction.
9193 if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9194 (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9195 convertToNonDenormSingle(APSplatBits)) {
9196 SDValue SplatNode = DAG.getNode(
9197 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9198 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9199 return DAG.getBitcast(Op.getValueType(), SplatNode);
9200 }
9201
9202 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9203
9204 bool IsPermutedLoad = false;
9205 const SDValue *InputLoad =
9206 getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9207 // Handle load-and-splat patterns as we have instructions that will do this
9208 // in one go.
9209 if (InputLoad && DAG.isSplatValue(Op, true)) {
9210 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9211
9212 // We have handling for 4 and 8 byte elements.
9213 unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9214
9215 // Checking for a single use of this load, we have to check for vector
9216 // width (128 bits) / ElementSize uses (since each operand of the
9217 // BUILD_VECTOR is a separate use of the value.
9218 if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
9219 ((Subtarget.hasVSX() && ElementSize == 64) ||
9220 (Subtarget.hasP9Vector() && ElementSize == 32))) {
9221 SDValue Ops[] = {
9222 LD->getChain(), // Chain
9223 LD->getBasePtr(), // Ptr
9224 DAG.getValueType(Op.getValueType()) // VT
9225 };
9226 return
9227 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
9228 DAG.getVTList(Op.getValueType(), MVT::Other),
9229 Ops, LD->getMemoryVT(), LD->getMemOperand());
9230 }
9231 }
9232
9233 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9234 // lowered to VSX instructions under certain conditions.
9235 // Without VSX, there is no pattern more efficient than expanding the node.
9236 if (Subtarget.hasVSX() &&
9237 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9238 Subtarget.hasP8Vector()))
9239 return Op;
9240 return SDValue();
9241 }
9242
9243 uint64_t SplatBits = APSplatBits.getZExtValue();
9244 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9245 unsigned SplatSize = SplatBitSize / 8;
9246
9247 // First, handle single instruction cases.
9248
9249 // All zeros?
9250 if (SplatBits == 0) {
9251 // Canonicalize all zero vectors to be v4i32.
9252 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9253 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9254 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9255 }
9256 return Op;
9257 }
9258
9259 // We have XXSPLTIW for constant splats four bytes wide.
9260 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9261 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9262 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9263 // turned into a 4-byte splat of 0xABABABAB.
9264 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9265 return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
9266 Op.getValueType(), DAG, dl);
9267
9268 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9269 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9270 dl);
9271
9272 // We have XXSPLTIB for constant splats one byte wide.
9273 if (Subtarget.hasP9Vector() && SplatSize == 1)
9274 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9275 dl);
9276
9277 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9278 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9279 (32-SplatBitSize));
9280 if (SextVal >= -16 && SextVal <= 15)
9281 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9282 dl);
9283
9284 // Two instruction sequences.
9285
9286 // If this value is in the range [-32,30] and is even, use:
9287 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9288 // If this value is in the range [17,31] and is odd, use:
9289 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9290 // If this value is in the range [-31,-17] and is odd, use:
9291 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9292 // Note the last two are three-instruction sequences.
9293 if (SextVal >= -32 && SextVal <= 31) {
9294 // To avoid having these optimizations undone by constant folding,
9295 // we convert to a pseudo that will be expanded later into one of
9296 // the above forms.
9297 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9298 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9299 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9300 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9301 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9302 if (VT == Op.getValueType())
9303 return RetVal;
9304 else
9305 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9306 }
9307
9308 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9309 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9310 // for fneg/fabs.
9311 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9312 // Make -1 and vspltisw -1:
9313 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9314
9315 // Make the VSLW intrinsic, computing 0x8000_0000.
9316 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9317 OnesV, DAG, dl);
9318
9319 // xor by OnesV to invert it.
9320 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9321 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9322 }
9323
9324 // Check to see if this is a wide variety of vsplti*, binop self cases.
9325 static const signed char SplatCsts[] = {
9326 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9327 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9328 };
9329
9330 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9331 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9332 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9333 int i = SplatCsts[idx];
9334
9335 // Figure out what shift amount will be used by altivec if shifted by i in
9336 // this splat size.
9337 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9338
9339 // vsplti + shl self.
9340 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9341 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9342 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9343 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9344 Intrinsic::ppc_altivec_vslw
9345 };
9346 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9347 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9348 }
9349
9350 // vsplti + srl self.
9351 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9352 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9353 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9354 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9355 Intrinsic::ppc_altivec_vsrw
9356 };
9357 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9358 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9359 }
9360
9361 // vsplti + sra self.
9362 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9363 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9364 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9365 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9366 Intrinsic::ppc_altivec_vsraw
9367 };
9368 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9369 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9370 }
9371
9372 // vsplti + rol self.
9373 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9374 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9375 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9376 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9377 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9378 Intrinsic::ppc_altivec_vrlw
9379 };
9380 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9381 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9382 }
9383
9384 // t = vsplti c, result = vsldoi t, t, 1
9385 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9386 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9387 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9388 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9389 }
9390 // t = vsplti c, result = vsldoi t, t, 2
9391 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9392 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9393 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9394 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9395 }
9396 // t = vsplti c, result = vsldoi t, t, 3
9397 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9398 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9399 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9400 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9401 }
9402 }
9403
9404 return SDValue();
9405}
9406
9407/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9408/// the specified operations to build the shuffle.
9409static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9410 SDValue RHS, SelectionDAG &DAG,
9411 const SDLoc &dl) {
9412 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9413 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9414 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9415
9416 enum {
9417 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9418 OP_VMRGHW,
9419 OP_VMRGLW,
9420 OP_VSPLTISW0,
9421 OP_VSPLTISW1,
9422 OP_VSPLTISW2,
9423 OP_VSPLTISW3,
9424 OP_VSLDOI4,
9425 OP_VSLDOI8,
9426 OP_VSLDOI12
9427 };
9428
9429 if (OpNum == OP_COPY) {
9430 if (LHSID == (1*9+2)*9+3) return LHS;
9431 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9431, __PRETTY_FUNCTION__))
;
9432 return RHS;
9433 }
9434
9435 SDValue OpLHS, OpRHS;
9436 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9437 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9438
9439 int ShufIdxs[16];
9440 switch (OpNum) {
9441 default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9441)
;
9442 case OP_VMRGHW:
9443 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9444 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9445 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9446 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9447 break;
9448 case OP_VMRGLW:
9449 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9450 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9451 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9452 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9453 break;
9454 case OP_VSPLTISW0:
9455 for (unsigned i = 0; i != 16; ++i)
9456 ShufIdxs[i] = (i&3)+0;
9457 break;
9458 case OP_VSPLTISW1:
9459 for (unsigned i = 0; i != 16; ++i)
9460 ShufIdxs[i] = (i&3)+4;
9461 break;
9462 case OP_VSPLTISW2:
9463 for (unsigned i = 0; i != 16; ++i)
9464 ShufIdxs[i] = (i&3)+8;
9465 break;
9466 case OP_VSPLTISW3:
9467 for (unsigned i = 0; i != 16; ++i)
9468 ShufIdxs[i] = (i&3)+12;
9469 break;
9470 case OP_VSLDOI4:
9471 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9472 case OP_VSLDOI8:
9473 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9474 case OP_VSLDOI12:
9475 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9476 }
9477 EVT VT = OpLHS.getValueType();
9478 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9479 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9480 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9481 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9482}
9483
9484/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9485/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9486/// SDValue.
9487SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9488 SelectionDAG &DAG) const {
9489 const unsigned BytesInVector = 16;
9490 bool IsLE = Subtarget.isLittleEndian();
9491 SDLoc dl(N);
9492 SDValue V1 = N->getOperand(0);
9493 SDValue V2 = N->getOperand(1);
9494 unsigned ShiftElts = 0, InsertAtByte = 0;
9495 bool Swap = false;
9496
9497 // Shifts required to get the byte we want at element 7.
9498 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9499 0, 15, 14, 13, 12, 11, 10, 9};
9500 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9501 1, 2, 3, 4, 5, 6, 7, 8};
9502
9503 ArrayRef<int> Mask = N->getMask();
9504 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9505
9506 // For each mask element, find out if we're just inserting something
9507 // from V2 into V1 or vice versa.
9508 // Possible permutations inserting an element from V2 into V1:
9509 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9510 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9511 // ...
9512 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9513 // Inserting from V1 into V2 will be similar, except mask range will be
9514 // [16,31].
9515
9516 bool FoundCandidate = false;
9517 // If both vector operands for the shuffle are the same vector, the mask
9518 // will contain only elements from the first one and the second one will be
9519 // undef.
9520 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9521 // Go through the mask of half-words to find an element that's being moved
9522 // from one vector to the other.
9523 for (unsigned i = 0; i < BytesInVector; ++i) {
9524 unsigned CurrentElement = Mask[i];
9525 // If 2nd operand is undefined, we should only look for element 7 in the
9526 // Mask.
9527 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9528 continue;
9529
9530 bool OtherElementsInOrder = true;
9531 // Examine the other elements in the Mask to see if they're in original
9532 // order.
9533 for (unsigned j = 0; j < BytesInVector; ++j) {
9534 if (j == i)
9535 continue;
9536 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9537 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9538 // in which we always assume we're always picking from the 1st operand.
9539 int MaskOffset =
9540 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9541 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9542 OtherElementsInOrder = false;
9543 break;
9544 }
9545 }
9546 // If other elements are in original order, we record the number of shifts
9547 // we need to get the element we want into element 7. Also record which byte
9548 // in the vector we should insert into.
9549 if (OtherElementsInOrder) {
9550 // If 2nd operand is undefined, we assume no shifts and no swapping.
9551 if (V2.isUndef()) {
9552 ShiftElts = 0;
9553 Swap = false;
9554 } else {
9555 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9556 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9557 : BigEndianShifts[CurrentElement & 0xF];
9558 Swap = CurrentElement < BytesInVector;
9559 }
9560 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9561 FoundCandidate = true;
9562 break;
9563 }
9564 }
9565
9566 if (!FoundCandidate)
9567 return SDValue();
9568
9569 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9570 // optionally with VECSHL if shift is required.
9571 if (Swap)
9572 std::swap(V1, V2);
9573 if (V2.isUndef())
9574 V2 = V1;
9575 if (ShiftElts) {
9576 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9577 DAG.getConstant(ShiftElts, dl, MVT::i32));
9578 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9579 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9580 }
9581 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9582 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9583}
9584
9585/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9586/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9587/// SDValue.
9588SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9589 SelectionDAG &DAG) const {
9590 const unsigned NumHalfWords = 8;
9591 const unsigned BytesInVector = NumHalfWords * 2;
9592 // Check that the shuffle is on half-words.
9593 if (!isNByteElemShuffleMask(N, 2, 1))
9594 return SDValue();
9595
9596 bool IsLE = Subtarget.isLittleEndian();
9597 SDLoc dl(N);
9598 SDValue V1 = N->getOperand(0);
9599 SDValue V2 = N->getOperand(1);
9600 unsigned ShiftElts = 0, InsertAtByte = 0;
9601 bool Swap = false;
9602
9603 // Shifts required to get the half-word we want at element 3.
9604 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9605 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9606
9607 uint32_t Mask = 0;
9608 uint32_t OriginalOrderLow = 0x1234567;
9609 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9610 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9611 // 32-bit space, only need 4-bit nibbles per element.
9612 for (unsigned i = 0; i < NumHalfWords; ++i) {
9613 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9614 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9615 }
9616
9617 // For each mask element, find out if we're just inserting something
9618 // from V2 into V1 or vice versa. Possible permutations inserting an element
9619 // from V2 into V1:
9620 // X, 1, 2, 3, 4, 5, 6, 7
9621 // 0, X, 2, 3, 4, 5, 6, 7
9622 // 0, 1, X, 3, 4, 5, 6, 7
9623 // 0, 1, 2, X, 4, 5, 6, 7
9624 // 0, 1, 2, 3, X, 5, 6, 7
9625 // 0, 1, 2, 3, 4, X, 6, 7
9626 // 0, 1, 2, 3, 4, 5, X, 7
9627 // 0, 1, 2, 3, 4, 5, 6, X
9628 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9629
9630 bool FoundCandidate = false;
9631 // Go through the mask of half-words to find an element that's being moved
9632 // from one vector to the other.
9633 for (unsigned i = 0; i < NumHalfWords; ++i) {
9634 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9635 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9636 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9637 uint32_t TargetOrder = 0x0;
9638
9639 // If both vector operands for the shuffle are the same vector, the mask
9640 // will contain only elements from the first one and the second one will be
9641 // undef.
9642 if (V2.isUndef()) {
9643 ShiftElts = 0;
9644 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9645 TargetOrder = OriginalOrderLow;
9646 Swap = false;
9647 // Skip if not the correct element or mask of other elements don't equal
9648 // to our expected order.
9649 if (MaskOneElt == VINSERTHSrcElem &&
9650 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9651 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9652 FoundCandidate = true;
9653 break;
9654 }
9655 } else { // If both operands are defined.
9656 // Target order is [8,15] if the current mask is between [0,7].
9657 TargetOrder =
9658 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9659 // Skip if mask of other elements don't equal our expected order.
9660 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9661 // We only need the last 3 bits for the number of shifts.
9662 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9663 : BigEndianShifts[MaskOneElt & 0x7];
9664 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9665 Swap = MaskOneElt < NumHalfWords;
9666 FoundCandidate = true;
9667 break;
9668 }
9669 }
9670 }
9671
9672 if (!FoundCandidate)
9673 return SDValue();
9674
9675 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9676 // optionally with VECSHL if shift is required.
9677 if (Swap)
9678 std::swap(V1, V2);
9679 if (V2.isUndef())
9680 V2 = V1;
9681 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9682 if (ShiftElts) {
9683 // Double ShiftElts because we're left shifting on v16i8 type.
9684 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9685 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9686 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9687 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9688 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9689 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9690 }
9691 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9692 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9693 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9694 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9695}
9696
9697/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9698/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9699/// return the default SDValue.
9700SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9701 SelectionDAG &DAG) const {
9702 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9703 // to v16i8. Peek through the bitcasts to get the actual operands.
9704 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9705 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9706
9707 auto ShuffleMask = SVN->getMask();
9708 SDValue VecShuffle(SVN, 0);
9709 SDLoc DL(SVN);
9710
9711 // Check that we have a four byte shuffle.
9712 if (!isNByteElemShuffleMask(SVN, 4, 1))
9713 return SDValue();
9714
9715 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9716 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9717 std::swap(LHS, RHS);
9718 VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9719 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9720 }
9721
9722 // Ensure that the RHS is a vector of constants.
9723 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9724 if (!BVN)
9725 return SDValue();
9726
9727 // Check if RHS is a splat of 4-bytes (or smaller).
9728 APInt APSplatValue, APSplatUndef;
9729 unsigned SplatBitSize;
9730 bool HasAnyUndefs;
9731 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9732 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9733 SplatBitSize > 32)
9734 return SDValue();
9735
9736 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9737 // The instruction splats a constant C into two words of the source vector
9738 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9739 // Thus we check that the shuffle mask is the equivalent of
9740 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9741 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9742 // within each word are consecutive, so we only need to check the first byte.
9743 SDValue Index;
9744 bool IsLE = Subtarget.isLittleEndian();
9745 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9746 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9747 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9748 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9749 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9750 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9751 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9752 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9753 else
9754 return SDValue();
9755
9756 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9757 // for XXSPLTI32DX.
9758 unsigned SplatVal = APSplatValue.getZExtValue();
9759 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9760 SplatVal |= (SplatVal << SplatBitSize);
9761
9762 SDValue SplatNode = DAG.getNode(
9763 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9764 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9765 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9766}
9767
9768/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9769/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9770/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9771/// i.e (or (shl x, C1), (srl x, 128-C1)).
9772SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9773 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL")((Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ROTL && \"Should only be called for ISD::ROTL\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9773, __PRETTY_FUNCTION__))
;
9774 assert(Op.getValueType() == MVT::v1i128 &&((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9775, __PRETTY_FUNCTION__))
9775 "Only set v1i128 as custom, other type shouldn't reach here!")((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9775, __PRETTY_FUNCTION__))
;
9776 SDLoc dl(Op);
9777 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9778 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9779 unsigned SHLAmt = N1.getConstantOperandVal(0);
9780 if (SHLAmt % 8 == 0) {
9781 SmallVector<int, 16> Mask(16, 0);
9782 std::iota(Mask.begin(), Mask.end(), 0);
9783 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9784 if (SDValue Shuffle =
9785 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9786 DAG.getUNDEF(MVT::v16i8), Mask))
9787 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9788 }
9789 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9790 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9791 DAG.getConstant(SHLAmt, dl, MVT::i32));
9792 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9793 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9794 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9795 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9796}
9797
9798/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9799/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9800/// return the code it can be lowered into. Worst case, it can always be
9801/// lowered into a vperm.
9802SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9803 SelectionDAG &DAG) const {
9804 SDLoc dl(Op);
9805 SDValue V1 = Op.getOperand(0);
9806 SDValue V2 = Op.getOperand(1);
9807 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9808
9809 // Any nodes that were combined in the target-independent combiner prior
9810 // to vector legalization will not be sent to the target combine. Try to
9811 // combine it here.
9812 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
1
Calling 'SDValue::operator bool'
4
Returning from 'SDValue::operator bool'
5
Taking false branch
9813 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9814 return NewShuffle;
9815 Op = NewShuffle;
9816 SVOp = cast<ShuffleVectorSDNode>(Op);
9817 V1 = Op.getOperand(0);
9818 V2 = Op.getOperand(1);
9819 }
9820 EVT VT = Op.getValueType();
9821 bool isLittleEndian = Subtarget.isLittleEndian();
9822
9823 unsigned ShiftElts, InsertAtByte;
6
'ShiftElts' declared without an initial value
9824 bool Swap = false;
9825
9826 // If this is a load-and-splat, we can do that with a single instruction
9827 // in some cases. However if the load has multiple uses, we don't want to
9828 // combine it because that will just produce multiple loads.
9829 bool IsPermutedLoad = false;
9830 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
7
Calling 'getNormalLoadInput'
17
Returning from 'getNormalLoadInput'
9831 if (InputLoad
17.1
'InputLoad' is null
17.1
'InputLoad' is null
&& Subtarget.hasVSX() && V2.isUndef() &&
9832 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9833 InputLoad->hasOneUse()) {
9834 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9835 int SplatIdx =
9836 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9837
9838 // The splat index for permuted loads will be in the left half of the vector
9839 // which is strictly wider than the loaded value by 8 bytes. So we need to
9840 // adjust the splat index to point to the correct address in memory.
9841 if (IsPermutedLoad) {
9842 assert(isLittleEndian && "Unexpected permuted load on big endian target")((isLittleEndian && "Unexpected permuted load on big endian target"
) ? static_cast<void> (0) : __assert_fail ("isLittleEndian && \"Unexpected permuted load on big endian target\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9842, __PRETTY_FUNCTION__))
;
9843 SplatIdx += IsFourByte ? 2 : 1;
9844 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9845, __PRETTY_FUNCTION__))
9845 "Splat of a value outside of the loaded memory")(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9845, __PRETTY_FUNCTION__))
;
9846 }
9847
9848 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9849 // For 4-byte load-and-splat, we need Power9.
9850 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9851 uint64_t Offset = 0;
9852 if (IsFourByte)
9853 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9854 else
9855 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9856
9857 SDValue BasePtr = LD->getBasePtr();
9858 if (Offset != 0)
9859 BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9860 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9861 SDValue Ops[] = {
9862 LD->getChain(), // Chain
9863 BasePtr, // BasePtr
9864 DAG.getValueType(Op.getValueType()) // VT
9865 };
9866 SDVTList VTL =
9867 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9868 SDValue LdSplt =
9869 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9870 Ops, LD->getMemoryVT(), LD->getMemOperand());
9871 if (LdSplt.getValueType() != SVOp->getValueType(0))
9872 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9873 return LdSplt;
9874 }
9875 }
9876 if (Subtarget.hasP9Vector() &&
18
Assuming the condition is false
9877 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9878 isLittleEndian)) {
9879 if (Swap)
9880 std::swap(V1, V2);
9881 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9882 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9883 if (ShiftElts) {
9884 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9885 DAG.getConstant(ShiftElts, dl, MVT::i32));
9886 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9887 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9888 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9889 }
9890 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9891 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9892 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9893 }
9894
9895 if (Subtarget.hasPrefixInstrs()) {
19
Assuming the condition is false
20
Taking false branch
9896 SDValue SplatInsertNode;
9897 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9898 return SplatInsertNode;
9899 }
9900
9901 if (Subtarget.hasP9Altivec()) {
21
Assuming the condition is false
22
Taking false branch
9902 SDValue NewISDNode;
9903 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9904 return NewISDNode;
9905
9906 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9907 return NewISDNode;
9908 }
9909
9910 if (Subtarget.hasVSX() &&
23
Assuming the condition is true
55
Taking true branch
9911 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
24
Calling 'isXXSLDWIShuffleMask'
54
Returning from 'isXXSLDWIShuffleMask'
9912 if (Swap
55.1
'Swap' is false
55.1
'Swap' is false
)
56
Taking false branch
9913 std::swap(V1, V2);
9914 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9915 SDValue Conv2 =
9916 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
57
'?' condition is false
9917
9918 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9919 DAG.getConstant(ShiftElts, dl, MVT::i32));
58
1st function call argument is an uninitialized value
9920 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9921 }
9922
9923 if (Subtarget.hasVSX() &&
9924 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9925 if (Swap)
9926 std::swap(V1, V2);
9927 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9928 SDValue Conv2 =
9929 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9930
9931 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9932 DAG.getConstant(ShiftElts, dl, MVT::i32));
9933 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9934 }
9935
9936 if (Subtarget.hasP9Vector()) {
9937 if (PPC::isXXBRHShuffleMask(SVOp)) {
9938 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9939 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9940 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9941 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9942 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9943 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9944 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9945 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9946 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9947 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9948 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9949 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9950 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9951 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9952 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9953 }
9954 }
9955
9956 if (Subtarget.hasVSX()) {
9957 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9958 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9959
9960 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9961 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9962 DAG.getConstant(SplatIdx, dl, MVT::i32));
9963 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9964 }
9965
9966 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9967 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9968 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9969 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9970 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9971 }
9972 }
9973
9974 // Cases that are handled by instructions that take permute immediates
9975 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9976 // selected by the instruction selector.
9977 if (V2.isUndef()) {
9978 if (PPC::isSplatShuffleMask(SVOp, 1) ||
9979 PPC::isSplatShuffleMask(SVOp, 2) ||
9980 PPC::isSplatShuffleMask(SVOp, 4) ||
9981 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9982 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9983 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9984 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9985 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9986 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9987 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9988 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9989 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9990 (Subtarget.hasP8Altivec() && (
9991 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9992 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9993 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9994 return Op;
9995 }
9996 }
9997
9998 // Altivec has a variety of "shuffle immediates" that take two vector inputs
9999 // and produce a fixed permutation. If any of these match, do not lower to
10000 // VPERM.
10001 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10002 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10003 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10004 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10005 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10006 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10007 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10008 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10009 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10010 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10011 (Subtarget.hasP8Altivec() && (
10012 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10013 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10014 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10015 return Op;
10016
10017 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10018 // perfect shuffle table to emit an optimal matching sequence.
10019 ArrayRef<int> PermMask = SVOp->getMask();
10020
10021 unsigned PFIndexes[4];
10022 bool isFourElementShuffle = true;
10023 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10024 unsigned EltNo = 8; // Start out undef.
10025 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10026 if (PermMask[i*4+j] < 0)
10027 continue; // Undef, ignore it.
10028
10029 unsigned ByteSource = PermMask[i*4+j];
10030 if ((ByteSource & 3) != j) {
10031 isFourElementShuffle = false;
10032 break;
10033 }
10034
10035 if (EltNo == 8) {
10036 EltNo = ByteSource/4;
10037 } else if (EltNo != ByteSource/4) {
10038 isFourElementShuffle = false;
10039 break;
10040 }
10041 }
10042 PFIndexes[i] = EltNo;
10043 }
10044
10045 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10046 // perfect shuffle vector to determine if it is cost effective to do this as
10047 // discrete instructions, or whether we should use a vperm.
10048 // For now, we skip this for little endian until such time as we have a
10049 // little-endian perfect shuffle table.
10050 if (isFourElementShuffle && !isLittleEndian) {
10051 // Compute the index in the perfect shuffle table.
10052 unsigned PFTableIndex =
10053 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10054
10055 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10056 unsigned Cost = (PFEntry >> 30);
10057
10058 // Determining when to avoid vperm is tricky. Many things affect the cost
10059 // of vperm, particularly how many times the perm mask needs to be computed.
10060 // For example, if the perm mask can be hoisted out of a loop or is already
10061 // used (perhaps because there are multiple permutes with the same shuffle
10062 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
10063 // the loop requires an extra register.
10064 //
10065 // As a compromise, we only emit discrete instructions if the shuffle can be
10066 // generated in 3 or fewer operations. When we have loop information
10067 // available, if this block is within a loop, we should avoid using vperm
10068 // for 3-operation perms and use a constant pool load instead.
10069 if (Cost < 3)
10070 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10071 }
10072
10073 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10074 // vector that will get spilled to the constant pool.
10075 if (V2.isUndef()) V2 = V1;
10076
10077 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10078 // that it is in input element units, not in bytes. Convert now.
10079
10080 // For little endian, the order of the input vectors is reversed, and
10081 // the permutation mask is complemented with respect to 31. This is
10082 // necessary to produce proper semantics with the big-endian-biased vperm
10083 // instruction.
10084 EVT EltVT = V1.getValueType().getVectorElementType();
10085 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10086
10087 SmallVector<SDValue, 16> ResultMask;
10088 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10089 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10090
10091 for (unsigned j = 0; j != BytesPerElement; ++j)
10092 if (isLittleEndian)
10093 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10094 dl, MVT::i32));
10095 else
10096 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10097 MVT::i32));
10098 }
10099
10100 ShufflesHandledWithVPERM++;
10101 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10102 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "Emitting a VPERM for the following shuffle:\n"
; } } while (false)
;
10103 LLVM_DEBUG(SVOp->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { SVOp->dump(); } } while (false)
;
10104 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "With the following permute control vector:\n"
; } } while (false)
;
10105 LLVM_DEBUG(VPermMask.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { VPermMask.dump(); } } while (false)
;
10106
10107 if (isLittleEndian)
10108 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10109 V2, V1, VPermMask);
10110 else
10111 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10112 V1, V2, VPermMask);
10113}
10114
10115/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10116/// vector comparison. If it is, return true and fill in Opc/isDot with
10117/// information about the intrinsic.
10118static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10119 bool &isDot, const PPCSubtarget &Subtarget) {
10120 unsigned IntrinsicID =
10121 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10122 CompareOpc = -1;
10123 isDot = false;
10124 switch (IntrinsicID) {
10125 default:
10126 return false;
10127 // Comparison predicates.
10128 case Intrinsic::ppc_altivec_vcmpbfp_p:
10129 CompareOpc = 966;
10130 isDot = true;
10131 break;
10132 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10133 CompareOpc = 198;
10134 isDot = true;
10135 break;
10136 case Intrinsic::ppc_altivec_vcmpequb_p:
10137 CompareOpc = 6;
10138 isDot = true;
10139 break;
10140 case Intrinsic::ppc_altivec_vcmpequh_p:
10141 CompareOpc = 70;
10142 isDot = true;
10143 break;
10144 case Intrinsic::ppc_altivec_vcmpequw_p:
10145 CompareOpc = 134;
10146 isDot = true;
10147 break;
10148 case Intrinsic::ppc_altivec_vcmpequd_p:
10149 if (Subtarget.hasP8Altivec()) {
10150 CompareOpc = 199;
10151 isDot = true;
10152 } else
10153 return false;
10154 break;
10155 case Intrinsic::ppc_altivec_vcmpneb_p:
10156 case Intrinsic::ppc_altivec_vcmpneh_p:
10157 case Intrinsic::ppc_altivec_vcmpnew_p:
10158 case Intrinsic::ppc_altivec_vcmpnezb_p:
10159 case Intrinsic::ppc_altivec_vcmpnezh_p:
10160 case Intrinsic::ppc_altivec_vcmpnezw_p:
10161 if (Subtarget.hasP9Altivec()) {
10162 switch (IntrinsicID) {
10163 default:
10164 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10164)
;
10165 case Intrinsic::ppc_altivec_vcmpneb_p:
10166 CompareOpc = 7;
10167 break;
10168 case Intrinsic::ppc_altivec_vcmpneh_p:
10169 CompareOpc = 71;
10170 break;
10171 case Intrinsic::ppc_altivec_vcmpnew_p:
10172 CompareOpc = 135;
10173 break;
10174 case Intrinsic::ppc_altivec_vcmpnezb_p:
10175 CompareOpc = 263;
10176 break;
10177 case Intrinsic::ppc_altivec_vcmpnezh_p:
10178 CompareOpc = 327;
10179 break;
10180 case Intrinsic::ppc_altivec_vcmpnezw_p:
10181 CompareOpc = 391;
10182 break;
10183 }
10184 isDot = true;
10185 } else
10186 return false;
10187 break;
10188 case Intrinsic::ppc_altivec_vcmpgefp_p:
10189 CompareOpc = 454;
10190 isDot = true;
10191 break;
10192 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10193 CompareOpc = 710;
10194 isDot = true;
10195 break;
10196 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10197 CompareOpc = 774;
10198 isDot = true;
10199 break;
10200 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10201 CompareOpc = 838;
10202 isDot = true;
10203 break;
10204 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10205 CompareOpc = 902;
10206 isDot = true;
10207 break;
10208 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10209 if (Subtarget.hasP8Altivec()) {
10210 CompareOpc = 967;
10211 isDot = true;
10212 } else
10213 return false;
10214 break;
10215 case Intrinsic::ppc_altivec_vcmpgtub_p:
10216 CompareOpc = 518;
10217 isDot = true;
10218 break;
10219 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10220 CompareOpc = 582;
10221 isDot = true;
10222 break;
10223 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10224 CompareOpc = 646;
10225 isDot = true;
10226 break;
10227 case Intrinsic::ppc_altivec_vcmpgtud_p:
10228 if (Subtarget.hasP8Altivec()) {
10229 CompareOpc = 711;
10230 isDot = true;
10231 } else
10232 return false;
10233 break;
10234
10235 case Intrinsic::ppc_altivec_vcmpequq:
10236 case Intrinsic::ppc_altivec_vcmpgtsq:
10237 case Intrinsic::ppc_altivec_vcmpgtuq:
10238 if (!Subtarget.isISA3_1())
10239 return false;
10240 switch (IntrinsicID) {
10241 default:
10242 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10242)
;
10243 case Intrinsic::ppc_altivec_vcmpequq:
10244 CompareOpc = 455;
10245 break;
10246 case Intrinsic::ppc_altivec_vcmpgtsq:
10247 CompareOpc = 903;
10248 break;
10249 case Intrinsic::ppc_altivec_vcmpgtuq:
10250 CompareOpc = 647;
10251 break;
10252 }
10253 break;
10254
10255 // VSX predicate comparisons use the same infrastructure
10256 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10257 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10258 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10259 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10260 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10261 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10262 if (Subtarget.hasVSX()) {
10263 switch (IntrinsicID) {
10264 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10265 CompareOpc = 99;
10266 break;
10267 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10268 CompareOpc = 115;
10269 break;
10270 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10271 CompareOpc = 107;
10272 break;
10273 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10274 CompareOpc = 67;
10275 break;
10276 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10277 CompareOpc = 83;
10278 break;
10279 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10280 CompareOpc = 75;
10281 break;
10282 }
10283 isDot = true;
10284 } else
10285 return false;
10286 break;
10287
10288 // Normal Comparisons.
10289 case Intrinsic::ppc_altivec_vcmpbfp:
10290 CompareOpc = 966;
10291 break;
10292 case Intrinsic::ppc_altivec_vcmpeqfp:
10293 CompareOpc = 198;
10294 break;
10295 case Intrinsic::ppc_altivec_vcmpequb:
10296 CompareOpc = 6;
10297 break;
10298 case Intrinsic::ppc_altivec_vcmpequh:
10299 CompareOpc = 70;
10300 break;
10301 case Intrinsic::ppc_altivec_vcmpequw:
10302 CompareOpc = 134;
10303 break;
10304 case Intrinsic::ppc_altivec_vcmpequd:
10305 if (Subtarget.hasP8Altivec())
10306 CompareOpc = 199;
10307 else
10308 return false;
10309 break;
10310 case Intrinsic::ppc_altivec_vcmpneb:
10311 case Intrinsic::ppc_altivec_vcmpneh:
10312 case Intrinsic::ppc_altivec_vcmpnew:
10313 case Intrinsic::ppc_altivec_vcmpnezb:
10314 case Intrinsic::ppc_altivec_vcmpnezh:
10315 case Intrinsic::ppc_altivec_vcmpnezw:
10316 if (Subtarget.hasP9Altivec())
10317 switch (IntrinsicID) {
10318 default:
10319 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10319)
;
10320 case Intrinsic::ppc_altivec_vcmpneb:
10321 CompareOpc = 7;
10322 break;
10323 case Intrinsic::ppc_altivec_vcmpneh:
10324 CompareOpc = 71;
10325 break;
10326 case Intrinsic::ppc_altivec_vcmpnew:
10327 CompareOpc = 135;
10328 break;
10329 case Intrinsic::ppc_altivec_vcmpnezb:
10330 CompareOpc = 263;
10331 break;
10332 case Intrinsic::ppc_altivec_vcmpnezh:
10333 CompareOpc = 327;
10334 break;
10335 case Intrinsic::ppc_altivec_vcmpnezw:
10336 CompareOpc = 391;
10337 break;
10338 }
10339 else
10340 return false;
10341 break;
10342 case Intrinsic::ppc_altivec_vcmpgefp:
10343 CompareOpc = 454;
10344 break;
10345 case Intrinsic::ppc_altivec_vcmpgtfp:
10346 CompareOpc = 710;
10347 break;
10348 case Intrinsic::ppc_altivec_vcmpgtsb:
10349 CompareOpc = 774;
10350 break;
10351 case Intrinsic::ppc_altivec_vcmpgtsh:
10352 CompareOpc = 838;
10353 break;
10354 case Intrinsic::ppc_altivec_vcmpgtsw:
10355 CompareOpc = 902;
10356 break;
10357 case Intrinsic::ppc_altivec_vcmpgtsd:
10358 if (Subtarget.hasP8Altivec())
10359 CompareOpc = 967;
10360 else
10361 return false;
10362 break;
10363 case Intrinsic::ppc_altivec_vcmpgtub:
10364 CompareOpc = 518;
10365 break;
10366 case Intrinsic::ppc_altivec_vcmpgtuh:
10367 CompareOpc = 582;
10368 break;
10369 case Intrinsic::ppc_altivec_vcmpgtuw:
10370 CompareOpc = 646;
10371 break;
10372 case Intrinsic::ppc_altivec_vcmpgtud:
10373 if (Subtarget.hasP8Altivec())
10374 CompareOpc = 711;
10375 else
10376 return false;
10377 break;
10378 case Intrinsic::ppc_altivec_vcmpequq_p:
10379 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10380 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10381 if (!Subtarget.isISA3_1())
10382 return false;
10383 switch (IntrinsicID) {
10384 default:
10385 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10385)
;
10386 case Intrinsic::ppc_altivec_vcmpequq_p:
10387 CompareOpc = 455;
10388 break;
10389 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10390 CompareOpc = 903;
10391 break;
10392 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10393 CompareOpc = 647;
10394 break;
10395 }
10396 isDot = true;
10397 break;
10398 }
10399 return true;
10400}
10401
10402/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10403/// lower, do it, otherwise return null.
10404SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10405 SelectionDAG &DAG) const {
10406 unsigned IntrinsicID =
10407 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10408
10409 SDLoc dl(Op);
10410
10411 switch (IntrinsicID) {
10412 case Intrinsic::thread_pointer:
10413 // Reads the thread pointer register, used for __builtin_thread_pointer.
10414 if (Subtarget.isPPC64())
10415 return DAG.getRegister(PPC::X13, MVT::i64);
10416 return DAG.getRegister(PPC::R2, MVT::i32);
10417
10418 case Intrinsic::ppc_mma_disassemble_acc:
10419 case Intrinsic::ppc_mma_disassemble_pair: {
10420 int NumVecs = 2;
10421 SDValue WideVec = Op.getOperand(1);
10422 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10423 NumVecs = 4;
10424 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10425 }
10426 SmallVector<SDValue, 4> RetOps;
10427 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10428 SDValue Extract = DAG.getNode(
10429 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10430 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10431 : VecNo,
10432 dl, MVT::i64));
10433 RetOps.push_back(Extract);
10434 }
10435 return DAG.getMergeValues(RetOps, dl);
10436 }
10437 }
10438
10439 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10440 // opcode number of the comparison.
10441 int CompareOpc;
10442 bool isDot;
10443 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10444 return SDValue(); // Don't custom lower most intrinsics.
10445
10446 // If this is a non-dot comparison, make the VCMP node and we are done.
10447 if (!isDot) {
10448 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10449 Op.getOperand(1), Op.getOperand(2),
10450 DAG.getConstant(CompareOpc, dl, MVT::i32));
10451 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10452 }
10453
10454 // Create the PPCISD altivec 'dot' comparison node.
10455 SDValue Ops[] = {
10456 Op.getOperand(2), // LHS
10457 Op.getOperand(3), // RHS
10458 DAG.getConstant(CompareOpc, dl, MVT::i32)
10459 };
10460 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10461 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10462
10463 // Now that we have the comparison, emit a copy from the CR to a GPR.
10464 // This is flagged to the above dot comparison.
10465 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10466 DAG.getRegister(PPC::CR6, MVT::i32),
10467 CompNode.getValue(1));
10468
10469 // Unpack the result based on how the target uses it.
10470 unsigned BitNo; // Bit # of CR6.
10471 bool InvertBit; // Invert result?
10472 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10473 default: // Can't happen, don't crash on invalid number though.
10474 case 0: // Return the value of the EQ bit of CR6.
10475 BitNo = 0; InvertBit = false;
10476 break;
10477 case 1: // Return the inverted value of the EQ bit of CR6.
10478 BitNo = 0; InvertBit = true;
10479 break;
10480 case 2: // Return the value of the LT bit of CR6.
10481 BitNo = 2; InvertBit = false;
10482 break;
10483 case 3: // Return the inverted value of the LT bit of CR6.
10484 BitNo = 2; InvertBit = true;
10485 break;
10486 }
10487
10488 // Shift the bit into the low position.
10489 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10490 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10491 // Isolate the bit.
10492 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10493 DAG.getConstant(1, dl, MVT::i32));
10494
10495 // If we are supposed to, toggle the bit.
10496 if (InvertBit)
10497 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10498 DAG.getConstant(1, dl, MVT::i32));
10499 return Flags;
10500}
10501
10502SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10503 SelectionDAG &DAG) const {
10504 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10505 // the beginning of the argument list.
10506 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10507 SDLoc DL(Op);
10508 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10509 case Intrinsic::ppc_cfence: {
10510 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")((ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? static_cast<void> (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10510, __PRETTY_FUNCTION__))
;
10511 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")((Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10511, __PRETTY_FUNCTION__))
;
10512 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10514 Op.getOperand(ArgStart + 1)),
10515 Op.getOperand(0)),
10516 0);
10517 }
10518 default:
10519 break;
10520 }
10521 return SDValue();
10522}
10523
10524// Lower scalar BSWAP64 to xxbrd.
10525SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10526 SDLoc dl(Op);
10527 // MTVSRDD
10528 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10529 Op.getOperand(0));
10530 // XXBRD
10531 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10532 // MFVSRD
10533 int VectorIndex = 0;
10534 if (Subtarget.isLittleEndian())
10535 VectorIndex = 1;
10536 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10537 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10538 return Op;
10539}
10540
10541// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10542// compared to a value that is atomically loaded (atomic loads zero-extend).
10543SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10544 SelectionDAG &DAG) const {
10545 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10546, __PRETTY_FUNCTION__))
10546 "Expecting an atomic compare-and-swap here.")((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10546, __PRETTY_FUNCTION__))
;
10547 SDLoc dl(Op);
10548 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10549 EVT MemVT = AtomicNode->getMemoryVT();
10550 if (MemVT.getSizeInBits() >= 32)
10551 return Op;
10552
10553 SDValue CmpOp = Op.getOperand(2);
10554 // If this is already correctly zero-extended, leave it alone.
10555 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10556 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10557 return Op;
10558
10559 // Clear the high bits of the compare operand.
10560 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10561 SDValue NewCmpOp =
10562 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10563 DAG.getConstant(MaskVal, dl, MVT::i32));
10564
10565 // Replace the existing compare operand with the properly zero-extended one.
10566 SmallVector<SDValue, 4> Ops;
10567 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10568 Ops.push_back(AtomicNode->getOperand(i));
10569 Ops[2] = NewCmpOp;
10570 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10571 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10572 auto NodeTy =
10573 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10574 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10575}
10576
10577SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10578 SelectionDAG &DAG) const {
10579 SDLoc dl(Op);
10580 // Create a stack slot that is 16-byte aligned.
10581 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10582 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10583 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10584 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10585
10586 // Store the input value into Value#0 of the stack slot.
10587 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10588 MachinePointerInfo());
10589 // Load it out.
10590 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10591}
10592
10593SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10594 SelectionDAG &DAG) const {
10595 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10596, __PRETTY_FUNCTION__))
10596 "Should only be called for ISD::INSERT_VECTOR_ELT")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10596, __PRETTY_FUNCTION__))
;
10597
10598 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10599 // We have legal lowering for constant indices but not for variable ones.
10600 if (!C)
10601 return SDValue();
10602
10603 EVT VT = Op.getValueType();
10604 SDLoc dl(Op);
10605 SDValue V1 = Op.getOperand(0);
10606 SDValue V2 = Op.getOperand(1);
10607 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10608 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10609 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10610 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10611 unsigned InsertAtElement = C->getZExtValue();
10612 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10613 if (Subtarget.isLittleEndian()) {
10614 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10615 }
10616 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10617 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10618 }
10619 return Op;
10620}
10621
10622SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10623 SelectionDAG &DAG) const {
10624 SDLoc dl(Op);
10625 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10626 SDValue LoadChain = LN->getChain();
10627 SDValue BasePtr = LN->getBasePtr();
10628 EVT VT = Op.getValueType();
10629
10630 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10631 return Op;
10632
10633 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10634 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10635 // 2 or 4 vsx registers.
10636 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10637, __PRETTY_FUNCTION__))
10637 "Type unsupported without MMA")(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10637, __PRETTY_FUNCTION__))
;
10638 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))
10639 "Type unsupported without paired vector support")(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))
;
10640 Align Alignment = LN->getAlign();
10641 SmallVector<SDValue, 4> Loads;
10642 SmallVector<SDValue, 4> LoadChains;
10643 unsigned NumVecs = VT.getSizeInBits() / 128;
10644 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10645 SDValue Load =
10646 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10647 LN->getPointerInfo().getWithOffset(Idx * 16),
10648 commonAlignment(Alignment, Idx * 16),
10649 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10650 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10651 DAG.getConstant(16, dl, BasePtr.getValueType()));
10652 Loads.push_back(Load);
10653 LoadChains.push_back(Load.getValue(1));
10654 }
10655 if (Subtarget.isLittleEndian()) {
10656 std::reverse(Loads.begin(), Loads.end());
10657 std::reverse(LoadChains.begin(), LoadChains.end());
10658 }
10659 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10660 SDValue Value =
10661 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10662 dl, VT, Loads);
10663 SDValue RetOps[] = {Value, TF};
10664 return DAG.getMergeValues(RetOps, dl);
10665}
10666
10667SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10668 SelectionDAG &DAG) const {
10669 SDLoc dl(Op);
10670 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10671 SDValue StoreChain = SN->getChain();
10672 SDValue BasePtr = SN->getBasePtr();
10673 SDValue Value = SN->getValue();
10674 EVT StoreVT = Value.getValueType();
10675
10676 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10677 return Op;
10678
10679 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10680 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10681 // underlying registers individually.
10682 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10683, __PRETTY_FUNCTION__))
10683 "Type unsupported without MMA")(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10683, __PRETTY_FUNCTION__))
;
10684 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))
10685 "Type unsupported without paired vector support")(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))
;
10686 Align Alignment = SN->getAlign();
10687 SmallVector<SDValue, 4> Stores;
10688 unsigned NumVecs = 2;
10689 if (StoreVT == MVT::v512i1) {
10690 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10691 NumVecs = 4;
10692 }
10693 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10694 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10695 SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10696 DAG.getConstant(VecNum, dl, MVT::i64));
10697 SDValue Store =
10698 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10699 SN->getPointerInfo().getWithOffset(Idx * 16),
10700 commonAlignment(Alignment, Idx * 16),
10701 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10702 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10703 DAG.getConstant(16, dl, BasePtr.getValueType()));
10704 Stores.push_back(Store);
10705 }
10706 SDValue TF = DAG.getTokenFactor(dl, Stores);
10707 return TF;
10708}
10709
10710SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10711 SDLoc dl(Op);
10712 if (Op.getValueType() == MVT::v4i32) {
10713 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10714
10715 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10716 // +16 as shift amt.
10717 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10718 SDValue RHSSwap = // = vrlw RHS, 16
10719 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10720
10721 // Shrinkify inputs to v8i16.
10722 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10723 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10724 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10725
10726 // Low parts multiplied together, generating 32-bit results (we ignore the
10727 // top parts).
10728 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10729 LHS, RHS, DAG, dl, MVT::v4i32);
10730
10731 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10732 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10733 // Shift the high parts up 16 bits.
10734 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10735 Neg16, DAG, dl);
10736 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10737 } else if (Op.getValueType() == MVT::v16i8) {
10738 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10739 bool isLittleEndian = Subtarget.isLittleEndian();
10740
10741 // Multiply the even 8-bit parts, producing 16-bit sums.
10742 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10743 LHS, RHS, DAG, dl, MVT::v8i16);
10744 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10745
10746 // Multiply the odd 8-bit parts, producing 16-bit sums.
10747 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10748 LHS, RHS, DAG, dl, MVT::v8i16);
10749 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10750
10751 // Merge the results together. Because vmuleub and vmuloub are
10752 // instructions with a big-endian bias, we must reverse the
10753 // element numbering and reverse the meaning of "odd" and "even"
10754 // when generating little endian code.
10755 int Ops[16];
10756 for (unsigned i = 0; i != 8; ++i) {
10757 if (isLittleEndian) {
10758 Ops[i*2 ] = 2*i;
10759 Ops[i*2+1] = 2*i+16;
10760 } else {
10761 Ops[i*2 ] = 2*i+1;
10762 Ops[i*2+1] = 2*i+1+16;
10763 }
10764 }
10765 if (isLittleEndian)
10766 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10767 else
10768 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10769 } else {
10770 llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10770)
;
10771 }
10772}
10773
10774SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10775
10776 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS")((Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ABS && \"Should only be called for ISD::ABS\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10776, __PRETTY_FUNCTION__))
;
10777
10778 EVT VT = Op.getValueType();
10779 assert(VT.isVector() &&((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10780, __PRETTY_FUNCTION__))
10780 "Only set vector abs as custom, scalar abs shouldn't reach here!")((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10780, __PRETTY_FUNCTION__))
;
10781 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))
10782 VT == MVT::v16i8) &&(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))
10783 "Unexpected vector element type!")(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))
;
10784 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
10785 "Current subtarget doesn't support smax v2i64!")(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
;
10786
10787 // For vector abs, it can be lowered to:
10788 // abs x
10789 // ==>
10790 // y = -x
10791 // smax(x, y)
10792
10793 SDLoc dl(Op);
10794 SDValue X = Op.getOperand(0);
10795 SDValue Zero = DAG.getConstant(0, dl, VT);
10796 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10797
10798 // SMAX patch https://reviews.llvm.org/D47332
10799 // hasn't landed yet, so use intrinsic first here.
10800 // TODO: Should use SMAX directly once SMAX patch landed
10801 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10802 if (VT == MVT::v2i64)
10803 BifID = Intrinsic::ppc_altivec_vmaxsd;
10804 else if (VT == MVT::v8i16)
10805 BifID = Intrinsic::ppc_altivec_vmaxsh;
10806 else if (VT == MVT::v16i8)
10807 BifID = Intrinsic::ppc_altivec_vmaxsb;
10808
10809 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10810}
10811
10812// Custom lowering for fpext vf32 to v2f64
10813SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10814
10815 assert(Op.getOpcode() == ISD::FP_EXTEND &&((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10816, __PRETTY_FUNCTION__))
10816 "Should only be called for ISD::FP_EXTEND")((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10816, __PRETTY_FUNCTION__))
;
10817
10818 // FIXME: handle extends from half precision float vectors on P9.
10819 // We only want to custom lower an extend from v2f32 to v2f64.
10820 if (Op.getValueType() != MVT::v2f64 ||
10821 Op.getOperand(0).getValueType() != MVT::v2f32)
10822 return SDValue();
10823
10824 SDLoc dl(Op);
10825 SDValue Op0 = Op.getOperand(0);
10826
10827 switch (Op0.getOpcode()) {
10828 default:
10829 return SDValue();
10830 case ISD::EXTRACT_SUBVECTOR: {
10831 assert(Op0.getNumOperands() == 2 &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))
10832 isa<ConstantSDNode>(Op0->getOperand(1)) &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))
10833 "Node should have 2 operands with second one being a constant!")((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))
;
10834
10835 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10836 return SDValue();
10837
10838 // Custom lower is only done for high or low doubleword.
10839 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10840 if (Idx % 2 != 0)
10841 return SDValue();
10842
10843 // Since input is v4f32, at this point Idx is either 0 or 2.
10844 // Shift to get the doubleword position we want.
10845 int DWord = Idx >> 1;
10846
10847 // High and low word positions are different on little endian.
10848 if (Subtarget.isLittleEndian())
10849 DWord ^= 0x1;
10850
10851 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10852 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10853 }
10854 case ISD::FADD:
10855 case ISD::FMUL:
10856 case ISD::FSUB: {
10857 SDValue NewLoad[2];
10858 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10859 // Ensure both input are loads.
10860 SDValue LdOp = Op0.getOperand(i);
10861 if (LdOp.getOpcode() != ISD::LOAD)
10862 return SDValue();
10863 // Generate new load node.
10864 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10865 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10866 NewLoad[i] = DAG.getMemIntrinsicNode(
10867 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10868 LD->getMemoryVT(), LD->getMemOperand());
10869 }
10870 SDValue NewOp =
10871 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10872 NewLoad[1], Op0.getNode()->getFlags());
10873 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10874 DAG.getConstant(0, dl, MVT::i32));
10875 }
10876 case ISD::LOAD: {
10877 LoadSDNode *LD = cast<LoadSDNode>(Op0);
10878 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10879 SDValue NewLd = DAG.getMemIntrinsicNode(
10880 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10881 LD->getMemoryVT(), LD->getMemOperand());
10882 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10883 DAG.getConstant(0, dl, MVT::i32));
10884 }
10885 }
10886 llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10886)
;
10887}
10888
10889/// LowerOperation - Provide custom lowering hooks for some operations.
10890///
10891SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10892 switch (Op.getOpcode()) {
10893 default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10893)
;
10894 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10895 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10896 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10897 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10898 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10899 case ISD::SETCC: return LowerSETCC(Op, DAG);
10900 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10901 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10902
10903 // Variable argument lowering.
10904 case ISD::VASTART: return LowerVASTART(Op, DAG);
10905 case ISD::VAARG: return LowerVAARG(Op, DAG);
10906 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10907
10908 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10909 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10910 case ISD::GET_DYNAMIC_AREA_OFFSET:
10911 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10912
10913 // Exception handling lowering.
10914 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10915 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10916 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10917
10918 case ISD::LOAD: return LowerLOAD(Op, DAG);
10919 case ISD::STORE: return LowerSTORE(Op, DAG);
10920 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10921 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10922 case ISD::STRICT_FP_TO_UINT:
10923 case ISD::STRICT_FP_TO_SINT:
10924 case ISD::FP_TO_UINT:
10925 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10926 case ISD::STRICT_UINT_TO_FP:
10927 case ISD::STRICT_SINT_TO_FP:
10928 case ISD::UINT_TO_FP:
10929 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10930 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10931
10932 // Lower 64-bit shifts.
10933 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10934 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10935 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10936
10937 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10938 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10939
10940 // Vector-related lowering.
10941 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10942 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10943 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10944 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10945 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10946 case ISD::MUL: return LowerMUL(Op, DAG);
10947 case ISD::ABS: return LowerABS(Op, DAG);
10948 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10949 case ISD::ROTL: return LowerROTL(Op, DAG);
10950
10951 // For counter-based loop handling.
10952 case ISD::INTRINSIC_W_CHAIN: return SDValue();
10953
10954 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10955
10956 // Frame & Return address.
10957 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10958 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10959
10960 case ISD::INTRINSIC_VOID:
10961 return LowerINTRINSIC_VOID(Op, DAG);
10962 case ISD::BSWAP:
10963 return LowerBSWAP(Op, DAG);
10964 case ISD::ATOMIC_CMP_SWAP:
10965 return LowerATOMIC_CMP_SWAP(Op, DAG);
10966 }
10967}
10968
10969void PPCTargetLowering::LowerOperationWrapper(SDNode *N,
10970 SmallVectorImpl<SDValue> &Results,
10971 SelectionDAG &DAG) const {
10972 SDValue Res = LowerOperation(SDValue(N, 0), DAG);
10973
10974 if (!Res.getNode())
10975 return;
10976
10977 // Take the return value as-is if original node has only one result.
10978 if (N->getNumValues() == 1) {
10979 Results.push_back(Res);
10980 return;
10981 }
10982
10983 // New node should have the same number of results.
10984 assert((N->getNumValues() == Res->getNumValues()) &&(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10985, __PRETTY_FUNCTION__))
10985 "Lowering returned the wrong number of results!")(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10985, __PRETTY_FUNCTION__))
;
10986
10987 for (unsigned i = 0; i < N->getNumValues(); ++i)
10988 Results.push_back(Res.getValue(i));
10989}
10990
10991void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10992 SmallVectorImpl<SDValue>&Results,
10993 SelectionDAG &DAG) const {
10994 SDLoc dl(N);
10995 switch (N->getOpcode()) {
10996 default:
10997 llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10997)
;
10998 case ISD::READCYCLECOUNTER: {
10999 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11000 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11001
11002 Results.push_back(
11003 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11004 Results.push_back(RTB.getValue(2));
11005 break;
11006 }
11007 case ISD::INTRINSIC_W_CHAIN: {
11008 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11009 Intrinsic::loop_decrement)
11010 break;
11011
11012 assert(N->getValueType(0) == MVT::i1 &&((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11013, __PRETTY_FUNCTION__))
11013 "Unexpected result type for CTR decrement intrinsic")((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11013, __PRETTY_FUNCTION__))
;
11014 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11015 N->getValueType(0));
11016 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11017 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11018 N->getOperand(1));
11019
11020 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11021 Results.push_back(NewInt.getValue(1));
11022 break;
11023 }
11024 case ISD::VAARG: {
11025 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11026 return;
11027
11028 EVT VT = N->getValueType(0);
11029
11030 if (VT == MVT::i64) {
11031 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11032
11033 Results.push_back(NewNode);
11034 Results.push_back(NewNode.getValue(1));
11035 }
11036 return;
11037 }
11038 case ISD::STRICT_FP_TO_SINT:
11039 case ISD::STRICT_FP_TO_UINT:
11040 case ISD::FP_TO_SINT:
11041 case ISD::FP_TO_UINT:
11042 // LowerFP_TO_INT() can only handle f32 and f64.
11043 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11044 MVT::ppcf128)
11045 return;
11046 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11047 return;
11048 case ISD::TRUNCATE: {
11049 if (!N->getValueType(0).isVector())
11050 return;
11051 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11052 if (Lowered)
11053 Results.push_back(Lowered);
11054 return;
11055 }
11056 case ISD::FSHL:
11057 case ISD::FSHR:
11058 // Don't handle funnel shifts here.
11059 return;
11060 case ISD::BITCAST:
11061 // Don't handle bitcast here.
11062 return;
11063 case ISD::FP_EXTEND:
11064 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11065 if (Lowered)
11066 Results.push_back(Lowered);
11067 return;
11068 }
11069}
11070
11071//===----------------------------------------------------------------------===//
11072// Other Lowering Code
11073//===----------------------------------------------------------------------===//
11074
11075static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
11076 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11077 Function *Func = Intrinsic::getDeclaration(M, Id);
11078 return Builder.CreateCall(Func, {});
11079}
11080
11081// The mappings for emitLeading/TrailingFence is taken from
11082// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11083Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11084 Instruction *Inst,
11085 AtomicOrdering Ord) const {
11086 if (Ord == AtomicOrdering::SequentiallyConsistent)
11087 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11088 if (isReleaseOrStronger(Ord))
11089 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11090 return nullptr;
11091}
11092
11093Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11094 Instruction *Inst,
11095 AtomicOrdering Ord) const {
11096 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11097 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11098 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11099 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11100 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11101 return Builder.CreateCall(
11102 Intrinsic::getDeclaration(
11103 Builder.GetInsertBlock()->getParent()->getParent(),
11104 Intrinsic::ppc_cfence, {Inst->getType()}),
11105 {Inst});
11106 // FIXME: Can use isync for rmw operation.
11107 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11108 }
11109 return nullptr;
11110}
11111
11112MachineBasicBlock *
11113PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11114 unsigned AtomicSize,
11115 unsigned BinOpcode,
11116 unsigned CmpOpcode,
11117 unsigned CmpPred) const {
11118 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11119 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11120
11121 auto LoadMnemonic = PPC::LDARX;
11122 auto StoreMnemonic = PPC::STDCX;
11123 switch (AtomicSize) {
11124 default:
11125 llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11125)
;
11126 case 1:
11127 LoadMnemonic = PPC::LBARX;
11128 StoreMnemonic = PPC::STBCX;
11129 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11129, __PRETTY_FUNCTION__))
;
11130 break;
11131 case 2:
11132 LoadMnemonic = PPC::LHARX;
11133 StoreMnemonic = PPC::STHCX;
11134 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11134, __PRETTY_FUNCTION__))
;
11135 break;
11136 case 4:
11137 LoadMnemonic = PPC::LWARX;
11138 StoreMnemonic = PPC::STWCX;
11139 break;
11140 case 8:
11141 LoadMnemonic = PPC::LDARX;
11142 StoreMnemonic = PPC::STDCX;
11143 break;
11144 }
11145
11146 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11147 MachineFunction *F = BB->getParent();
11148 MachineFunction::iterator It = ++BB->getIterator();
11149
11150 Register dest = MI.getOperand(0).getReg();
11151 Register ptrA = MI.getOperand(1).getReg();
11152 Register ptrB = MI.getOperand(2).getReg();
11153 Register incr = MI.getOperand(3).getReg();
11154 DebugLoc dl = MI.getDebugLoc();
11155
11156 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11157 MachineBasicBlock *loop2MBB =
11158 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11159 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11160 F->insert(It, loopMBB);
11161 if (CmpOpcode)
11162 F->insert(It, loop2MBB);
11163 F->insert(It, exitMBB);
11164 exitMBB->splice(exitMBB->begin(), BB,
11165 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11166 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11167
11168 MachineRegisterInfo &RegInfo = F->getRegInfo();
11169 Register TmpReg = (!BinOpcode) ? incr :
11170 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11171 : &PPC::GPRCRegClass);
11172
11173 // thisMBB:
11174 // ...
11175 // fallthrough --> loopMBB
11176 BB->addSuccessor(loopMBB);
11177
11178 // loopMBB:
11179 // l[wd]arx dest, ptr
11180 // add r0, dest, incr
11181 // st[wd]cx. r0, ptr
11182 // bne- loopMBB
11183 // fallthrough --> exitMBB
11184
11185 // For max/min...
11186 // loopMBB:
11187 // l[wd]arx dest, ptr
11188 // cmpl?[wd] incr, dest
11189 // bgt exitMBB
11190 // loop2MBB:
11191 // st[wd]cx. dest, ptr
11192 // bne- loopMBB
11193 // fallthrough --> exitMBB
11194
11195 BB = loopMBB;
11196 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11197 .addReg(ptrA).addReg(ptrB);
11198 if (BinOpcode)
11199 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11200 if (CmpOpcode) {
11201 // Signed comparisons of byte or halfword values must be sign-extended.
11202 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11203 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11204 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11205 ExtReg).addReg(dest);
11206 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11207 .addReg(incr).addReg(ExtReg);
11208 } else
11209 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11210 .addReg(incr).addReg(dest);
11211
11212 BuildMI(BB, dl, TII->get(PPC::BCC))
11213 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11214 BB->addSuccessor(loop2MBB);
11215 BB->addSuccessor(exitMBB);
11216 BB = loop2MBB;
11217 }
11218 BuildMI(BB, dl, TII->get(StoreMnemonic))
11219 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11220 BuildMI(BB, dl, TII->get(PPC::BCC))
11221 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11222 BB->addSuccessor(loopMBB);
11223 BB->addSuccessor(exitMBB);
11224
11225 // exitMBB:
11226 // ...
11227 BB = exitMBB;
11228 return BB;
11229}
11230
11231MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11232 MachineInstr &MI, MachineBasicBlock *BB,
11233 bool is8bit, // operation
11234 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11235 // If we support part-word atomic mnemonics, just use them
11236 if (Subtarget.hasPartwordAtomics())
11237 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11238 CmpPred);
11239
11240 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11241 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11242 // In 64 bit mode we have to use 64 bits for addresses, even though the
11243 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11244 // registers without caring whether they're 32 or 64, but here we're
11245 // doing actual arithmetic on the addresses.
11246 bool is64bit = Subtarget.isPPC64();
11247 bool isLittleEndian = Subtarget.isLittleEndian();
11248 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11249
11250 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11251 MachineFunction *F = BB->getParent();
11252 MachineFunction::iterator It = ++BB->getIterator();
11253
11254 Register dest = MI.getOperand(0).getReg();
11255 Register ptrA = MI.getOperand(1).getReg();
11256 Register ptrB = MI.getOperand(2).getReg();
11257 Register incr = MI.getOperand(3).getReg();
11258 DebugLoc dl = MI.getDebugLoc();
11259
11260 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11261 MachineBasicBlock *loop2MBB =
11262 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11263 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11264 F->insert(It, loopMBB);
11265 if (CmpOpcode)
11266 F->insert(It, loop2MBB);
11267 F->insert(It, exitMBB);
11268 exitMBB->splice(exitMBB->begin(), BB,
11269 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11270 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11271
11272 MachineRegisterInfo &RegInfo = F->getRegInfo();
11273 const TargetRegisterClass *RC =
11274 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11275 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11276
11277 Register PtrReg = RegInfo.createVirtualRegister(RC);
11278 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11279 Register ShiftReg =
11280 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11281 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11282 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11283 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11284 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11285 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11286 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11287 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11288 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11289 Register Ptr1Reg;
11290 Register TmpReg =
11291 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11292
11293 // thisMBB:
11294 // ...
11295 // fallthrough --> loopMBB
11296 BB->addSuccessor(loopMBB);
11297
11298 // The 4-byte load must be aligned, while a char or short may be
11299 // anywhere in the word. Hence all this nasty bookkeeping code.
11300 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11301 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11302 // xori shift, shift1, 24 [16]
11303 // rlwinm ptr, ptr1, 0, 0, 29
11304 // slw incr2, incr, shift
11305 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11306 // slw mask, mask2, shift
11307 // loopMBB:
11308 // lwarx tmpDest, ptr
11309 // add tmp, tmpDest, incr2
11310 // andc tmp2, tmpDest, mask
11311 // and tmp3, tmp, mask
11312 // or tmp4, tmp3, tmp2
11313 // stwcx. tmp4, ptr
11314 // bne- loopMBB
11315 // fallthrough --> exitMBB
11316 // srw dest, tmpDest, shift
11317 if (ptrA != ZeroReg) {
11318 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11319 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11320 .addReg(ptrA)
11321 .addReg(ptrB);
11322 } else {
11323 Ptr1Reg = ptrB;
11324 }
11325 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11326 // mode.
11327 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11328 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11329 .addImm(3)
11330 .addImm(27)
11331 .addImm(is8bit ? 28 : 27);
11332 if (!isLittleEndian)
11333 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11334 .addReg(Shift1Reg)
11335 .addImm(is8bit ? 24 : 16);
11336 if (is64bit)
11337 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11338 .addReg(Ptr1Reg)
11339 .addImm(0)
11340 .addImm(61);
11341 else
11342 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11343 .addReg(Ptr1Reg)
11344 .addImm(0)
11345 .addImm(0)
11346 .addImm(29);
11347 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11348 if (is8bit)
11349 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11350 else {
11351 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11352 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11353 .addReg(Mask3Reg)
11354 .addImm(65535);
11355 }
11356 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11357 .addReg(Mask2Reg)
11358 .addReg(ShiftReg);
11359
11360 BB = loopMBB;
11361 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11362 .addReg(ZeroReg)
11363 .addReg(PtrReg);
11364 if (BinOpcode)
11365 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11366 .addReg(Incr2Reg)
11367 .addReg(TmpDestReg);
11368 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11369 .addReg(TmpDestReg)
11370 .addReg(MaskReg);
11371 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11372 if (CmpOpcode) {
11373 // For unsigned comparisons, we can directly compare the shifted values.
11374 // For signed comparisons we shift and sign extend.
11375 Register SReg = RegInfo.createVirtualRegister(GPRC);
11376 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11377 .addReg(TmpDestReg)
11378 .addReg(MaskReg);
11379 unsigned ValueReg = SReg;
11380 unsigned CmpReg = Incr2Reg;
11381 if (CmpOpcode == PPC::CMPW) {
11382 ValueReg = RegInfo.createVirtualRegister(GPRC);
11383 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11384 .addReg(SReg)
11385 .addReg(ShiftReg);
11386 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11387 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11388 .addReg(ValueReg);
11389 ValueReg = ValueSReg;
11390 CmpReg = incr;
11391 }
11392 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11393 .addReg(CmpReg)
11394 .addReg(ValueReg);
11395 BuildMI(BB, dl, TII->get(PPC::BCC))
11396 .addImm(CmpPred)
11397 .addReg(PPC::CR0)
11398 .addMBB(exitMBB);
11399 BB->addSuccessor(loop2MBB);
11400 BB->addSuccessor(exitMBB);
11401 BB = loop2MBB;
11402 }
11403 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11404 BuildMI(BB, dl, TII->get(PPC::STWCX))
11405 .addReg(Tmp4Reg)
11406 .addReg(ZeroReg)
11407 .addReg(PtrReg);
11408 BuildMI(BB, dl, TII->get(PPC::BCC))
11409 .addImm(PPC::PRED_NE)
11410 .addReg(PPC::CR0)
11411 .addMBB(loopMBB);
11412 BB->addSuccessor(loopMBB);
11413 BB->addSuccessor(exitMBB);
11414
11415 // exitMBB:
11416 // ...
11417 BB = exitMBB;
11418 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11419 .addReg(TmpDestReg)
11420 .addReg(ShiftReg);
11421 return BB;
11422}
11423
11424llvm::MachineBasicBlock *
11425PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11426 MachineBasicBlock *MBB) const {
11427 DebugLoc DL = MI.getDebugLoc();
11428 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11429 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11430
11431 MachineFunction *MF = MBB->getParent();
11432 MachineRegisterInfo &MRI = MF->getRegInfo();
11433
11434 const BasicBlock *BB = MBB->getBasicBlock();
11435 MachineFunction::iterator I = ++MBB->getIterator();
11436
11437 Register DstReg = MI.getOperand(0).getReg();
11438 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11439 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")((TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"
) ? static_cast<void> (0) : __assert_fail ("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11439, __PRETTY_FUNCTION__))
;
11440 Register mainDstReg = MRI.createVirtualRegister(RC);
11441 Register restoreDstReg = MRI.createVirtualRegister(RC);
11442
11443 MVT PVT = getPointerTy(MF->getDataLayout());
11444 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11445, __PRETTY_FUNCTION__))
11445 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11445, __PRETTY_FUNCTION__))
;
11446 // For v = setjmp(buf), we generate
11447 //
11448 // thisMBB:
11449 // SjLjSetup mainMBB
11450 // bl mainMBB
11451 // v_restore = 1
11452 // b sinkMBB
11453 //
11454 // mainMBB:
11455 // buf[LabelOffset] = LR
11456 // v_main = 0
11457 //
11458 // sinkMBB:
11459 // v = phi(main, restore)
11460 //
11461
11462 MachineBasicBlock *thisMBB = MBB;
11463 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11464 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11465 MF->insert(I, mainMBB);
11466 MF->insert(I, sinkMBB);
11467
11468 MachineInstrBuilder MIB;
11469
11470 // Transfer the remainder of BB and its successor edges to sinkMBB.
11471 sinkMBB->splice(sinkMBB->begin(), MBB,
11472 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11473 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11474
11475 // Note that the structure of the jmp_buf used here is not compatible
11476 // with that used by libc, and is not designed to be. Specifically, it
11477 // stores only those 'reserved' registers that LLVM does not otherwise
11478 // understand how to spill. Also, by convention, by the time this
11479 // intrinsic is called, Clang has already stored the frame address in the
11480 // first slot of the buffer and stack address in the third. Following the
11481 // X86 target code, we'll store the jump address in the second slot. We also
11482 // need to save the TOC pointer (R2) to handle jumps between shared
11483 // libraries, and that will be stored in the fourth slot. The thread
11484 // identifier (R13) is not affected.
11485
11486 // thisMBB:
11487 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11488 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11489 const int64_t BPOffset = 4 * PVT.getStoreSize();
11490
11491 // Prepare IP either in reg.
11492 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11493 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11494 Register BufReg = MI.getOperand(1).getReg();
11495
11496 if (Subtarget.is64BitELFABI()) {
11497 setUsesTOCBasePtr(*MBB->getParent());
11498 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11499 .addReg(PPC::X2)
11500 .addImm(TOCOffset)
11501 .addReg(BufReg)
11502 .cloneMemRefs(MI);
11503 }
11504
11505 // Naked functions never have a base pointer, and so we use r1. For all
11506 // other functions, this decision must be delayed until during PEI.
11507 unsigned BaseReg;
11508 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11509 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11510 else
11511 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11512
11513 MIB = BuildMI(*thisMBB, MI, DL,
11514 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11515 .addReg(BaseReg)
11516 .addImm(BPOffset)
11517 .addReg(BufReg)
11518 .cloneMemRefs(MI);
11519
11520 // Setup
11521 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11522 MIB.addRegMask(TRI->getNoPreservedMask());
11523
11524 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11525
11526 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11527 .addMBB(mainMBB);
11528 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11529
11530 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11531 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11532
11533 // mainMBB:
11534 // mainDstReg = 0
11535 MIB =
11536 BuildMI(mainMBB, DL,
11537 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11538
11539 // Store IP
11540 if (Subtarget.isPPC64()) {
11541 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11542 .addReg(LabelReg)
11543 .addImm(LabelOffset)
11544 .addReg(BufReg);
11545 } else {
11546 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11547 .addReg(LabelReg)
11548 .addImm(LabelOffset)
11549 .addReg(BufReg);
11550 }
11551 MIB.cloneMemRefs(MI);
11552
11553 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11554 mainMBB->addSuccessor(sinkMBB);
11555
11556 // sinkMBB:
11557 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11558 TII->get(PPC::PHI), DstReg)
11559 .addReg(mainDstReg).addMBB(mainMBB)
11560 .addReg(restoreDstReg).addMBB(thisMBB);
11561
11562 MI.eraseFromParent();
11563 return sinkMBB;
11564}
11565
11566MachineBasicBlock *
11567PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11568 MachineBasicBlock *MBB) const {
11569 DebugLoc DL = MI.getDebugLoc();
11570 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11571
11572 MachineFunction *MF = MBB->getParent();
11573 MachineRegisterInfo &MRI = MF->getRegInfo();
11574
11575 MVT PVT = getPointerTy(MF->getDataLayout());
11576 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11577, __PRETTY_FUNCTION__))
11577 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11577, __PRETTY_FUNCTION__))
;
11578
11579 const TargetRegisterClass *RC =
11580 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11581 Register Tmp = MRI.createVirtualRegister(RC);
11582 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11583 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11584 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11585 unsigned BP =
11586 (PVT == MVT::i64)
11587 ? PPC::X30
11588 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11589 : PPC::R30);
11590
11591 MachineInstrBuilder MIB;
11592
11593 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11594 const int64_t SPOffset = 2 * PVT.getStoreSize();
11595 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11596 const int64_t BPOffset = 4 * PVT.getStoreSize();
11597
11598 Register BufReg = MI.getOperand(0).getReg();
11599
11600 // Reload FP (the jumped-to function may not have had a
11601 // frame pointer, and if so, then its r31 will be restored
11602 // as necessary).
11603 if (PVT == MVT::i64) {
11604 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11605 .addImm(0)
11606 .addReg(BufReg);
11607 } else {
11608 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11609 .addImm(0)
11610 .addReg(BufReg);
11611 }
11612 MIB.cloneMemRefs(MI);
11613
11614 // Reload IP
11615 if (PVT == MVT::i64) {
11616 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11617 .addImm(LabelOffset)
11618 .addReg(BufReg);
11619 } else {
11620 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11621 .addImm(LabelOffset)
11622 .addReg(BufReg);
11623 }
11624 MIB.cloneMemRefs(MI);
11625
11626 // Reload SP
11627 if (PVT == MVT::i64) {
11628 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11629 .addImm(SPOffset)
11630 .addReg(BufReg);
11631 } else {
11632 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11633 .addImm(SPOffset)
11634 .addReg(BufReg);
11635 }
11636 MIB.cloneMemRefs(MI);
11637
11638 // Reload BP
11639 if (PVT == MVT::i64) {
11640 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11641 .addImm(BPOffset)
11642 .addReg(BufReg);
11643 } else {
11644 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11645 .addImm(BPOffset)
11646 .addReg(BufReg);
11647 }
11648 MIB.cloneMemRefs(MI);
11649
11650 // Reload TOC
11651 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11652 setUsesTOCBasePtr(*MBB->getParent());
11653 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11654 .addImm(TOCOffset)
11655 .addReg(BufReg)
11656 .cloneMemRefs(MI);
11657 }
11658
11659 // Jump
11660 BuildMI(*MBB, MI, DL,
11661 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11662 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11663
11664 MI.eraseFromParent();
11665 return MBB;
11666}
11667
11668bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11669 // If the function specifically requests inline stack probes, emit them.
11670 if (MF.getFunction().hasFnAttribute("probe-stack"))
11671 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11672 "inline-asm";
11673 return false;
11674}
11675
11676unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11677 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11678 unsigned StackAlign = TFI->getStackAlignment();
11679 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11680, __PRETTY_FUNCTION__))
11680 "Unexpected stack alignment")((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11680, __PRETTY_FUNCTION__))
;
11681 // The default stack probe size is 4096 if the function has no
11682 // stack-probe-size attribute.
11683 unsigned StackProbeSize = 4096;
11684 const Function &Fn = MF.getFunction();
11685 if (Fn.hasFnAttribute("stack-probe-size"))
11686 Fn.getFnAttribute("stack-probe-size")
11687 .getValueAsString()
11688 .getAsInteger(0, StackProbeSize);
11689 // Round down to the stack alignment.
11690 StackProbeSize &= ~(StackAlign - 1);
11691 return StackProbeSize ? StackProbeSize : StackAlign;
11692}
11693
11694// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11695// into three phases. In the first phase, it uses pseudo instruction
11696// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11697// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11698// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11699// MaxCallFrameSize so that it can calculate correct data area pointer.
11700MachineBasicBlock *
11701PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11702 MachineBasicBlock *MBB) const {
11703 const bool isPPC64 = Subtarget.isPPC64();
11704 MachineFunction *MF = MBB->getParent();
11705 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11706 DebugLoc DL = MI.getDebugLoc();
11707 const unsigned ProbeSize = getStackProbeSize(*MF);
11708 const BasicBlock *ProbedBB = MBB->getBasicBlock();
11709 MachineRegisterInfo &MRI = MF->getRegInfo();
11710 // The CFG of probing stack looks as
11711 // +-----+
11712 // | MBB |
11713 // +--+--+
11714 // |
11715 // +----v----+
11716 // +--->+ TestMBB +---+
11717 // | +----+----+ |
11718 // | | |
11719 // | +-----v----+ |
11720 // +---+ BlockMBB | |
11721 // +----------+ |
11722 // |
11723 // +---------+ |
11724 // | TailMBB +<--+
11725 // +---------+
11726 // In MBB, calculate previous frame pointer and final stack pointer.
11727 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11728 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11729 // TailMBB is spliced via \p MI.
11730 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11731 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11732 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11733
11734 MachineFunction::iterator MBBIter = ++MBB->getIterator();
11735 MF->insert(MBBIter, TestMBB);
11736 MF->insert(MBBIter, BlockMBB);
11737 MF->insert(MBBIter, TailMBB);
11738
11739 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11740 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11741
11742 Register DstReg = MI.getOperand(0).getReg();
11743 Register NegSizeReg = MI.getOperand(1).getReg();
11744 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11745 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11746 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11747 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11748
11749 // Since value of NegSizeReg might be realigned in prologepilog, insert a
11750 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11751 // NegSize.
11752 unsigned ProbeOpc;
11753 if (!MRI.hasOneNonDBGUse(NegSizeReg))
11754 ProbeOpc =
11755 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11756 else
11757 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11758 // and NegSizeReg will be allocated in the same phyreg to avoid
11759 // redundant copy when NegSizeReg has only one use which is current MI and
11760 // will be replaced by PREPARE_PROBED_ALLOCA then.
11761 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11762 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11763 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11764 .addDef(ActualNegSizeReg)
11765 .addReg(NegSizeReg)
11766 .add(MI.getOperand(2))
11767 .add(MI.getOperand(3));
11768
11769 // Calculate final stack pointer, which equals to SP + ActualNegSize.
11770 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11771 FinalStackPtr)
11772 .addReg(SPReg)
11773 .addReg(ActualNegSizeReg);
11774
11775 // Materialize a scratch register for update.
11776 int64_t NegProbeSize = -(int64_t)ProbeSize;
11777 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!")((isInt<32>(NegProbeSize) && "Unhandled probe size!"
) ? static_cast<void> (0) : __assert_fail ("isInt<32>(NegProbeSize) && \"Unhandled probe size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11777, __PRETTY_FUNCTION__))
;
11778 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11779 if (!isInt<16>(NegProbeSize)) {
11780 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11781 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11782 .addImm(NegProbeSize >> 16);
11783 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11784 ScratchReg)
11785 .addReg(TempReg)
11786 .addImm(NegProbeSize & 0xFFFF);
11787 } else
11788 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11789 .addImm(NegProbeSize);
11790
11791 {
11792 // Probing leading residual part.
11793 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11794 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11795 .addReg(ActualNegSizeReg)
11796 .addReg(ScratchReg);
11797 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11798 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11799 .addReg(Div)
11800 .addReg(ScratchReg);
11801 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11802 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11803 .addReg(Mul)
11804 .addReg(ActualNegSizeReg);
11805 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11806 .addReg(FramePointer)
11807 .addReg(SPReg)
11808 .addReg(NegMod);
11809 }
11810
11811 {
11812 // Remaining part should be multiple of ProbeSize.
11813 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11814 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11815 .addReg(SPReg)
11816 .addReg(FinalStackPtr);
11817 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11818 .addImm(PPC::PRED_EQ)
11819 .addReg(CmpResult)
11820 .addMBB(TailMBB);
11821 TestMBB->addSuccessor(BlockMBB);
11822 TestMBB->addSuccessor(TailMBB);
11823 }
11824
11825 {
11826 // Touch the block.
11827 // |P...|P...|P...
11828 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11829 .addReg(FramePointer)
11830 .addReg(SPReg)
11831 .addReg(ScratchReg);
11832 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11833 BlockMBB->addSuccessor(TestMBB);
11834 }
11835
11836 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11837 // DYNAREAOFFSET pseudo instruction to get the future result.
11838 Register MaxCallFrameSizeReg =
11839 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11840 BuildMI(TailMBB, DL,
11841 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11842 MaxCallFrameSizeReg)
11843 .add(MI.getOperand(2))
11844 .add(MI.getOperand(3));
11845 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11846 .addReg(SPReg)
11847 .addReg(MaxCallFrameSizeReg);
11848
11849 // Splice instructions after MI to TailMBB.
11850 TailMBB->splice(TailMBB->end(), MBB,
11851 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11852 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11853 MBB->addSuccessor(TestMBB);
11854
11855 // Delete the pseudo instruction.
11856 MI.eraseFromParent();
11857
11858 ++NumDynamicAllocaProbed;
11859 return TailMBB;
11860}
11861
11862MachineBasicBlock *
11863PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11864 MachineBasicBlock *BB) const {
11865 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11866 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11867 if (Subtarget.is64BitELFABI() &&
11868 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11869 !Subtarget.isUsingPCRelativeCalls()) {
11870 // Call lowering should have added an r2 operand to indicate a dependence
11871 // on the TOC base pointer value. It can't however, because there is no
11872 // way to mark the dependence as implicit there, and so the stackmap code
11873 // will confuse it with a regular operand. Instead, add the dependence
11874 // here.
11875 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11876 }
11877
11878 return emitPatchPoint(MI, BB);
11879 }
11880
11881 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11882 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11883 return emitEHSjLjSetJmp(MI, BB);
11884 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11885 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11886 return emitEHSjLjLongJmp(MI, BB);
11887 }
11888
11889 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11890
11891 // To "insert" these instructions we actually have to insert their
11892 // control-flow patterns.
11893 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11894 MachineFunction::iterator It = ++BB->getIterator();
11895
11896 MachineFunction *F = BB->getParent();
11897
11898 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11899 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11900 MI.getOpcode() == PPC::SELECT_I8) {
11901 SmallVector<MachineOperand, 2> Cond;
11902 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11903 MI.getOpcode() == PPC::SELECT_CC_I8)
11904 Cond.push_back(MI.getOperand(4));
11905 else
11906 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11907 Cond.push_back(MI.getOperand(1));
11908
11909 DebugLoc dl = MI.getDebugLoc();
11910 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11911 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11912 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11913 MI.getOpcode() == PPC::SELECT_CC_F8 ||
11914 MI.getOpcode() == PPC::SELECT_CC_F16 ||
11915 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11916 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11917 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11918 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11919 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11920 MI.getOpcode() == PPC::SELECT_CC_SPE ||
11921 MI.getOpcode() == PPC::SELECT_F4 ||
11922 MI.getOpcode() == PPC::SELECT_F8 ||
11923 MI.getOpcode() == PPC::SELECT_F16 ||
11924 MI.getOpcode() == PPC::SELECT_SPE ||
11925 MI.getOpcode() == PPC::SELECT_SPE4 ||
11926 MI.getOpcode() == PPC::SELECT_VRRC ||
11927 MI.getOpcode() == PPC::SELECT_VSFRC ||
11928 MI.getOpcode() == PPC::SELECT_VSSRC ||
11929 MI.getOpcode() == PPC::SELECT_VSRC) {
11930 // The incoming instruction knows the destination vreg to set, the
11931 // condition code register to branch on, the true/false values to
11932 // select between, and a branch opcode to use.
11933
11934 // thisMBB:
11935 // ...
11936 // TrueVal = ...
11937 // cmpTY ccX, r1, r2
11938 // bCC copy1MBB
11939 // fallthrough --> copy0MBB
11940 MachineBasicBlock *thisMBB = BB;
11941 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11942 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11943 DebugLoc dl = MI.getDebugLoc();
11944 F->insert(It, copy0MBB);
11945 F->insert(It, sinkMBB);
11946
11947 // Transfer the remainder of BB and its successor edges to sinkMBB.
11948 sinkMBB->splice(sinkMBB->begin(), BB,
11949 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11950 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11951
11952 // Next, add the true and fallthrough blocks as its successors.
11953 BB->addSuccessor(copy0MBB);
11954 BB->addSuccessor(sinkMBB);
11955
11956 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11957 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11958 MI.getOpcode() == PPC::SELECT_F16 ||
11959 MI.getOpcode() == PPC::SELECT_SPE4 ||
11960 MI.getOpcode() == PPC::SELECT_SPE ||
11961 MI.getOpcode() == PPC::SELECT_VRRC ||
11962 MI.getOpcode() == PPC::SELECT_VSFRC ||
11963 MI.getOpcode() == PPC::SELECT_VSSRC ||
11964 MI.getOpcode() == PPC::SELECT_VSRC) {
11965 BuildMI(BB, dl, TII->get(PPC::BC))
11966 .addReg(MI.getOperand(1).getReg())
11967 .addMBB(sinkMBB);
11968 } else {
11969 unsigned SelectPred = MI.getOperand(4).getImm();
11970 BuildMI(BB, dl, TII->get(PPC::BCC))
11971 .addImm(SelectPred)
11972 .addReg(MI.getOperand(1).getReg())
11973 .addMBB(sinkMBB);
11974 }
11975
11976 // copy0MBB:
11977 // %FalseValue = ...
11978 // # fallthrough to sinkMBB
11979 BB = copy0MBB;
11980
11981 // Update machine-CFG edges
11982 BB->addSuccessor(sinkMBB);
11983
11984 // sinkMBB:
11985 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11986 // ...
11987 BB = sinkMBB;
11988 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11989 .addReg(MI.getOperand(3).getReg())
11990 .addMBB(copy0MBB)
11991 .addReg(MI.getOperand(2).getReg())
11992 .addMBB(thisMBB);
11993 } else if (MI.getOpcode() == PPC::ReadTB) {
11994 // To read the 64-bit time-base register on a 32-bit target, we read the
11995 // two halves. Should the counter have wrapped while it was being read, we
11996 // need to try again.
11997 // ...
11998 // readLoop:
11999 // mfspr Rx,TBU # load from TBU
12000 // mfspr Ry,TB # load from TB
12001 // mfspr Rz,TBU # load from TBU
12002 // cmpw crX,Rx,Rz # check if 'old'='new'
12003 // bne readLoop # branch if they're not equal
12004 // ...
12005
12006 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12007 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12008 DebugLoc dl = MI.getDebugLoc();
12009 F->insert(It, readMBB);
12010 F->insert(It, sinkMBB);
12011
12012 // Transfer the remainder of BB and its successor edges to sinkMBB.
12013 sinkMBB->splice(sinkMBB->begin(), BB,
12014 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12015 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12016
12017 BB->addSuccessor(readMBB);
12018 BB = readMBB;
12019
12020 MachineRegisterInfo &RegInfo = F->getRegInfo();
12021 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12022 Register LoReg = MI.getOperand(0).getReg();
12023 Register HiReg = MI.getOperand(1).getReg();
12024
12025 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12026 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12027 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12028
12029 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12030
12031 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12032 .addReg(HiReg)
12033 .addReg(ReadAgainReg);
12034 BuildMI(BB, dl, TII->get(PPC::BCC))
12035 .addImm(PPC::PRED_NE)
12036 .addReg(CmpReg)
12037 .addMBB(readMBB);
12038
12039 BB->addSuccessor(readMBB);
12040 BB->addSuccessor(sinkMBB);
12041 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12042 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12044 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12046 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12048 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12049
12050 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12051 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12053 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12055 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12057 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12058
12059 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12060 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12062 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12064 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12066 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12067
12068 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12069 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12070 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12071 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12073 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12075 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12076
12077 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12078 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12080 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12082 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12083 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12084 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12085
12086 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12087 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12089 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12091 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12093 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12094
12095 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12096 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12097 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12098 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12099 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12100 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12101 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12102 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12103
12104 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12105 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12106 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12107 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12108 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12109 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12110 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12111 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12112
12113 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12114 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12115 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12116 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12117 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12118 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12119 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12120 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12121
12122 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12123 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12124 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12125 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12126 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12127 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12128 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12129 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12130
12131 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12132 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12133 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12134 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12135 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12136 BB = EmitAtomicBinary(MI, BB, 4, 0);
12137 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12138 BB = EmitAtomicBinary(MI, BB, 8, 0);
12139 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12140 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12141 (Subtarget.hasPartwordAtomics() &&
12142 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12143 (Subtarget.hasPartwordAtomics() &&
12144 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12145 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12146
12147 auto LoadMnemonic = PPC::LDARX;
12148 auto StoreMnemonic = PPC::STDCX;
12149 switch (MI.getOpcode()) {
12150 default:
12151 llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12151)
;
12152 case PPC::ATOMIC_CMP_SWAP_I8:
12153 LoadMnemonic = PPC::LBARX;
12154 StoreMnemonic = PPC::STBCX;
12155 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12155, __PRETTY_FUNCTION__))
;
12156 break;
12157 case PPC::ATOMIC_CMP_SWAP_I16:
12158 LoadMnemonic = PPC::LHARX;
12159 StoreMnemonic = PPC::STHCX;
12160 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12160, __PRETTY_FUNCTION__))
;
12161 break;
12162 case PPC::ATOMIC_CMP_SWAP_I32:
12163 LoadMnemonic = PPC::LWARX;
12164 StoreMnemonic = PPC::STWCX;
12165 break;
12166 case PPC::ATOMIC_CMP_SWAP_I64:
12167 LoadMnemonic = PPC::LDARX;
12168 StoreMnemonic = PPC::STDCX;
12169 break;
12170 }
12171 Register dest = MI.getOperand(0).getReg();
12172 Register ptrA = MI.getOperand(1).getReg();
12173 Register ptrB = MI.getOperand(2).getReg();
12174 Register oldval = MI.getOperand(3).getReg();
12175 Register newval = MI.getOperand(4).getReg();
12176 DebugLoc dl = MI.getDebugLoc();
12177
12178 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12179 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12180 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12181 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12182 F->insert(It, loop1MBB);
12183 F->insert(It, loop2MBB);
12184 F->insert(It, midMBB);
12185 F->insert(It, exitMBB);
12186 exitMBB->splice(exitMBB->begin(), BB,
12187 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12188 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12189
12190 // thisMBB:
12191 // ...
12192 // fallthrough --> loopMBB
12193 BB->addSuccessor(loop1MBB);
12194
12195 // loop1MBB:
12196 // l[bhwd]arx dest, ptr
12197 // cmp[wd] dest, oldval
12198 // bne- midMBB
12199 // loop2MBB:
12200 // st[bhwd]cx. newval, ptr
12201 // bne- loopMBB
12202 // b exitBB
12203 // midMBB:
12204 // st[bhwd]cx. dest, ptr
12205 // exitBB:
12206 BB = loop1MBB;
12207 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12208 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12209 .addReg(oldval)
12210 .addReg(dest);
12211 BuildMI(BB, dl, TII->get(PPC::BCC))
12212 .addImm(PPC::PRED_NE)
12213 .addReg(PPC::CR0)
12214 .addMBB(midMBB);
12215 BB->addSuccessor(loop2MBB);
12216 BB->addSuccessor(midMBB);
12217
12218 BB = loop2MBB;
12219 BuildMI(BB, dl, TII->get(StoreMnemonic))
12220 .addReg(newval)
12221 .addReg(ptrA)
12222 .addReg(ptrB);
12223 BuildMI(BB, dl, TII->get(PPC::BCC))
12224 .addImm(PPC::PRED_NE)
12225 .addReg(PPC::CR0)
12226 .addMBB(loop1MBB);
12227 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12228 BB->addSuccessor(loop1MBB);
12229 BB->addSuccessor(exitMBB);
12230
12231 BB = midMBB;
12232 BuildMI(BB, dl, TII->get(StoreMnemonic))
12233 .addReg(dest)
12234 .addReg(ptrA)
12235 .addReg(ptrB);
12236 BB->addSuccessor(exitMBB);
12237
12238 // exitMBB:
12239 // ...
12240 BB = exitMBB;
12241 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12242 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12243 // We must use 64-bit registers for addresses when targeting 64-bit,
12244 // since we're actually doing arithmetic on them. Other registers
12245 // can be 32-bit.
12246 bool is64bit = Subtarget.isPPC64();
12247 bool isLittleEndian = Subtarget.isLittleEndian();
12248 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12249
12250 Register dest = MI.getOperand(0).getReg();
12251 Register ptrA = MI.getOperand(1).getReg();
12252 Register ptrB = MI.getOperand(2).getReg();
12253 Register oldval = MI.getOperand(3).getReg();
12254 Register newval = MI.getOperand(4).getReg();
12255 DebugLoc dl = MI.getDebugLoc();
12256
12257 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12258 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12259 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12260 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12261 F->insert(It, loop1MBB);
12262 F->insert(It, loop2MBB);
12263 F->insert(It, midMBB);
12264 F->insert(It, exitMBB);
12265 exitMBB->splice(exitMBB->begin(), BB,
12266 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12267 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12268
12269 MachineRegisterInfo &RegInfo = F->getRegInfo();
12270 const TargetRegisterClass *RC =
12271 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12272 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12273
12274 Register PtrReg = RegInfo.createVirtualRegister(RC);
12275 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12276 Register ShiftReg =
12277 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12278 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12279 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12280 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12281 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12282 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12283 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12284 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12285 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12286 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12287 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12288 Register Ptr1Reg;
12289 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12290 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12291 // thisMBB:
12292 // ...
12293 // fallthrough --> loopMBB
12294 BB->addSuccessor(loop1MBB);
12295
12296 // The 4-byte load must be aligned, while a char or short may be
12297 // anywhere in the word. Hence all this nasty bookkeeping code.
12298 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12299 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12300 // xori shift, shift1, 24 [16]
12301 // rlwinm ptr, ptr1, 0, 0, 29
12302 // slw newval2, newval, shift
12303 // slw oldval2, oldval,shift
12304 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12305 // slw mask, mask2, shift
12306 // and newval3, newval2, mask
12307 // and oldval3, oldval2, mask
12308 // loop1MBB:
12309 // lwarx tmpDest, ptr
12310 // and tmp, tmpDest, mask
12311 // cmpw tmp, oldval3
12312 // bne- midMBB
12313 // loop2MBB:
12314 // andc tmp2, tmpDest, mask
12315 // or tmp4, tmp2, newval3
12316 // stwcx. tmp4, ptr
12317 // bne- loop1MBB
12318 // b exitBB
12319 // midMBB:
12320 // stwcx. tmpDest, ptr
12321 // exitBB:
12322 // srw dest, tmpDest, shift
12323 if (ptrA != ZeroReg) {
12324 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12325 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12326 .addReg(ptrA)
12327 .addReg(ptrB);
12328 } else {
12329 Ptr1Reg = ptrB;
12330 }
12331
12332 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12333 // mode.
12334 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12335 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12336 .addImm(3)
12337 .addImm(27)
12338 .addImm(is8bit ? 28 : 27);
12339 if (!isLittleEndian)
12340 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12341 .addReg(Shift1Reg)
12342 .addImm(is8bit ? 24 : 16);
12343 if (is64bit)
12344 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12345 .addReg(Ptr1Reg)
12346 .addImm(0)
12347 .addImm(61);
12348 else
12349 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12350 .addReg(Ptr1Reg)
12351 .addImm(0)
12352 .addImm(0)
12353 .addImm(29);
12354 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12355 .addReg(newval)
12356 .addReg(ShiftReg);
12357 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12358 .addReg(oldval)
12359 .addReg(ShiftReg);
12360 if (is8bit)
12361 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12362 else {
12363 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12364 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12365 .addReg(Mask3Reg)
12366 .addImm(65535);
12367 }
12368 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12369 .addReg(Mask2Reg)
12370 .addReg(ShiftReg);
12371 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12372 .addReg(NewVal2Reg)
12373 .addReg(MaskReg);
12374 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12375 .addReg(OldVal2Reg)
12376 .addReg(MaskReg);
12377
12378 BB = loop1MBB;
12379 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12380 .addReg(ZeroReg)
12381 .addReg(PtrReg);
12382 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12383 .addReg(TmpDestReg)
12384 .addReg(MaskReg);
12385 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12386 .addReg(TmpReg)
12387 .addReg(OldVal3Reg);
12388 BuildMI(BB, dl, TII->get(PPC::BCC))
12389 .addImm(PPC::PRED_NE)
12390 .addReg(PPC::CR0)
12391 .addMBB(midMBB);
12392 BB->addSuccessor(loop2MBB);
12393 BB->addSuccessor(midMBB);
12394
12395 BB = loop2MBB;
12396 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12397 .addReg(TmpDestReg)
12398 .addReg(MaskReg);
12399 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12400 .addReg(Tmp2Reg)
12401 .addReg(NewVal3Reg);
12402 BuildMI(BB, dl, TII->get(PPC::STWCX))
12403 .addReg(Tmp4Reg)
12404 .addReg(ZeroReg)
12405 .addReg(PtrReg);
12406 BuildMI(BB, dl, TII->get(PPC::BCC))
12407 .addImm(PPC::PRED_NE)
12408 .addReg(PPC::CR0)
12409 .addMBB(loop1MBB);
12410 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12411 BB->addSuccessor(loop1MBB);
12412 BB->addSuccessor(exitMBB);
12413
12414 BB = midMBB;
12415 BuildMI(BB, dl, TII->get(PPC::STWCX))
12416 .addReg(TmpDestReg)
12417 .addReg(ZeroReg)
12418 .addReg(PtrReg);
12419 BB->addSuccessor(exitMBB);
12420
12421 // exitMBB:
12422 // ...
12423 BB = exitMBB;
12424 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12425 .addReg(TmpReg)
12426 .addReg(ShiftReg);
12427 } else if (MI.getOpcode() == PPC::FADDrtz) {
12428 // This pseudo performs an FADD with rounding mode temporarily forced
12429 // to round-to-zero. We emit this via custom inserter since the FPSCR
12430 // is not modeled at the SelectionDAG level.
12431 Register Dest = MI.getOperand(0).getReg();
12432 Register Src1 = MI.getOperand(1).getReg();
12433 Register Src2 = MI.getOperand(2).getReg();
12434 DebugLoc dl = MI.getDebugLoc();
12435
12436 MachineRegisterInfo &RegInfo = F->getRegInfo();
12437 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12438
12439 // Save FPSCR value.
12440 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12441
12442 // Set rounding mode to round-to-zero.
12443 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12444 .addImm(31)
12445 .addReg(PPC::RM, RegState::ImplicitDefine);
12446
12447 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12448 .addImm(30)
12449 .addReg(PPC::RM, RegState::ImplicitDefine);
12450
12451 // Perform addition.
12452 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12453 .addReg(Src1)
12454 .addReg(Src2);
12455 if (MI.getFlag(MachineInstr::NoFPExcept))
12456 MIB.setMIFlag(MachineInstr::NoFPExcept);
12457
12458 // Restore FPSCR value.
12459 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12460 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12461 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12462 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12463 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12464 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12465 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12466 ? PPC::ANDI8_rec
12467 : PPC::ANDI_rec;
12468 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12469 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12470
12471 MachineRegisterInfo &RegInfo = F->getRegInfo();
12472 Register Dest = RegInfo.createVirtualRegister(
12473 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12474
12475 DebugLoc Dl = MI.getDebugLoc();
12476 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12477 .addReg(MI.getOperand(1).getReg())
12478 .addImm(1);
12479 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12480 MI.getOperand(0).getReg())
12481 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12482 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12483 DebugLoc Dl = MI.getDebugLoc();
12484 MachineRegisterInfo &RegInfo = F->getRegInfo();
12485 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12486 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12487 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12488 MI.getOperand(0).getReg())
12489 .addReg(CRReg);
12490 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12491 DebugLoc Dl = MI.getDebugLoc();
12492 unsigned Imm = MI.getOperand(1).getImm();
12493 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12494 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12495 MI.getOperand(0).getReg())
12496 .addReg(PPC::CR0EQ);
12497 } else if (MI.getOpcode() == PPC::SETRNDi) {
12498 DebugLoc dl = MI.getDebugLoc();
12499 Register OldFPSCRReg = MI.getOperand(0).getReg();
12500
12501 // Save FPSCR value.
12502 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12503
12504 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12505 // the following settings:
12506 // 00 Round to nearest
12507 // 01 Round to 0
12508 // 10 Round to +inf
12509 // 11 Round to -inf
12510
12511 // When the operand is immediate, using the two least significant bits of
12512 // the immediate to set the bits 62:63 of FPSCR.
12513 unsigned Mode = MI.getOperand(1).getImm();
12514 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12515 .addImm(31)
12516 .addReg(PPC::RM, RegState::ImplicitDefine);
12517
12518 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12519 .addImm(30)
12520 .addReg(PPC::RM, RegState::ImplicitDefine);
12521 } else if (MI.getOpcode() == PPC::SETRND) {
12522 DebugLoc dl = MI.getDebugLoc();
12523
12524 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12525 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12526 // If the target doesn't have DirectMove, we should use stack to do the
12527 // conversion, because the target doesn't have the instructions like mtvsrd
12528 // or mfvsrd to do this conversion directly.
12529 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12530 if (Subtarget.hasDirectMove()) {
12531 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12532 .addReg(SrcReg);
12533 } else {
12534 // Use stack to do the register copy.
12535 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12536 MachineRegisterInfo &RegInfo = F->getRegInfo();
12537 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12538 if (RC == &PPC::F8RCRegClass) {
12539 // Copy register from F8RCRegClass to G8RCRegclass.
12540 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12541, __PRETTY_FUNCTION__))
12541 "Unsupported RegClass.")(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12541, __PRETTY_FUNCTION__))
;
12542
12543 StoreOp = PPC::STFD;
12544 LoadOp = PPC::LD;
12545 } else {
12546 // Copy register from G8RCRegClass to F8RCRegclass.
12547 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))
12548 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))
12549 "Unsupported RegClass.")(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))
;
12550 }
12551
12552 MachineFrameInfo &MFI = F->getFrameInfo();
12553 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12554
12555 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12556 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12557 MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12558 MFI.getObjectAlign(FrameIdx));
12559
12560 // Store the SrcReg into the stack.
12561 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12562 .addReg(SrcReg)
12563 .addImm(0)
12564 .addFrameIndex(FrameIdx)
12565 .addMemOperand(MMOStore);
12566
12567 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12568 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12569 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12570 MFI.getObjectAlign(FrameIdx));
12571
12572 // Load from the stack where SrcReg is stored, and save to DestReg,
12573 // so we have done the RegClass conversion from RegClass::SrcReg to
12574 // RegClass::DestReg.
12575 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12576 .addImm(0)
12577 .addFrameIndex(FrameIdx)
12578 .addMemOperand(MMOLoad);
12579 }
12580 };
12581
12582 Register OldFPSCRReg = MI.getOperand(0).getReg();
12583
12584 // Save FPSCR value.
12585 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12586
12587 // When the operand is gprc register, use two least significant bits of the
12588 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12589 //
12590 // copy OldFPSCRTmpReg, OldFPSCRReg
12591 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12592 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12593 // copy NewFPSCRReg, NewFPSCRTmpReg
12594 // mtfsf 255, NewFPSCRReg
12595 MachineOperand SrcOp = MI.getOperand(1);
12596 MachineRegisterInfo &RegInfo = F->getRegInfo();
12597 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12598
12599 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12600
12601 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12602 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12603
12604 // The first operand of INSERT_SUBREG should be a register which has
12605 // subregisters, we only care about its RegClass, so we should use an
12606 // IMPLICIT_DEF register.
12607 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12608 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12609 .addReg(ImDefReg)
12610 .add(SrcOp)
12611 .addImm(1);
12612
12613 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12614 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12615 .addReg(OldFPSCRTmpReg)
12616 .addReg(ExtSrcReg)
12617 .addImm(0)
12618 .addImm(62);
12619
12620 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12621 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12622
12623 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12624 // bits of FPSCR.
12625 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12626 .addImm(255)
12627 .addReg(NewFPSCRReg)
12628 .addImm(0)
12629 .addImm(0);
12630 } else if (MI.getOpcode() == PPC::SETFLM) {
12631 DebugLoc Dl = MI.getDebugLoc();
12632
12633 // Result of setflm is previous FPSCR content, so we need to save it first.
12634 Register OldFPSCRReg = MI.getOperand(0).getReg();
12635 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12636
12637 // Put bits in 32:63 to FPSCR.
12638 Register NewFPSCRReg = MI.getOperand(1).getReg();
12639 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12640 .addImm(255)
12641 .addReg(NewFPSCRReg)
12642 .addImm(0)
12643 .addImm(0);
12644 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12645 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12646 return emitProbedAlloca(MI, BB);
12647 } else {
12648 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12648)
;
12649 }
12650
12651 MI.eraseFromParent(); // The pseudo instruction is gone now.
12652 return BB;
12653}
12654
12655//===----------------------------------------------------------------------===//
12656// Target Optimization Hooks
12657//===----------------------------------------------------------------------===//
12658
12659static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12660 // For the estimates, convergence is quadratic, so we essentially double the
12661 // number of digits correct after every iteration. For both FRE and FRSQRTE,
12662 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12663 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12664 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12665 if (VT.getScalarType() == MVT::f64)
12666 RefinementSteps++;
12667 return RefinementSteps;
12668}
12669
12670SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12671 int Enabled, int &RefinementSteps,
12672 bool &UseOneConstNR,
12673 bool Reciprocal) const {
12674 EVT VT = Operand.getValueType();
12675 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12676 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12677 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12678 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12679 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12680 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12681
12682 // The Newton-Raphson computation with a single constant does not provide
12683 // enough accuracy on some CPUs.
12684 UseOneConstNR = !Subtarget.needsTwoConstNR();
12685 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12686 }
12687 return SDValue();
12688}
12689
12690SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12691 int Enabled,
12692 int &RefinementSteps) const {
12693 EVT VT = Operand.getValueType();
12694 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12695 (VT == MVT::f64 && Subtarget.hasFRE()) ||
12696 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12697 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12698 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12699 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12700 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12701 }
12702 return SDValue();
12703}
12704
12705unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12706 // Note: This functionality is used only when unsafe-fp-math is enabled, and
12707 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12708 // enabled for division), this functionality is redundant with the default
12709 // combiner logic (once the division -> reciprocal/multiply transformation
12710 // has taken place). As a result, this matters more for older cores than for
12711 // newer ones.
12712
12713 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12714 // reciprocal if there are two or more FDIVs (for embedded cores with only
12715 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12716 switch (Subtarget.getCPUDirective()) {
12717 default:
12718 return 3;
12719 case PPC::DIR_440:
12720 case PPC::DIR_A2:
12721 case PPC::DIR_E500:
12722 case PPC::DIR_E500mc:
12723 case PPC::DIR_E5500:
12724 return 2;
12725 }
12726}
12727
12728// isConsecutiveLSLoc needs to work even if all adds have not yet been
12729// collapsed, and so we need to look through chains of them.
12730static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12731 int64_t& Offset, SelectionDAG &DAG) {
12732 if (DAG.isBaseWithConstantOffset(Loc)) {
12733 Base = Loc.getOperand(0);
12734 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12735
12736 // The base might itself be a base plus an offset, and if so, accumulate
12737 // that as well.
12738 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12739 }
12740}
12741
12742static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12743 unsigned Bytes, int Dist,
12744 SelectionDAG &DAG) {
12745 if (VT.getSizeInBits() / 8 != Bytes)
12746 return false;
12747
12748 SDValue BaseLoc = Base->getBasePtr();
12749 if (Loc.getOpcode() == ISD::FrameIndex) {
12750 if (BaseLoc.getOpcode() != ISD::FrameIndex)
12751 return false;
12752 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12753 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12754 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12755 int FS = MFI.getObjectSize(FI);
12756 int BFS = MFI.getObjectSize(BFI);
12757 if (FS != BFS || FS != (int)Bytes) return false;
12758 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12759 }
12760
12761 SDValue Base1 = Loc, Base2 = BaseLoc;
12762 int64_t Offset1 = 0, Offset2 = 0;
12763 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12764 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12765 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12766 return true;
12767
12768 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12769 const GlobalValue *GV1 = nullptr;
12770 const GlobalValue *GV2 = nullptr;
12771 Offset1 = 0;
12772 Offset2 = 0;
12773 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12774 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12775 if (isGA1 && isGA2 && GV1 == GV2)
12776 return Offset1 == (Offset2 + Dist*Bytes);
12777 return false;
12778}
12779
12780// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12781// not enforce equality of the chain operands.
12782static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12783 unsigned Bytes, int Dist,
12784 SelectionDAG &DAG) {
12785 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12786 EVT VT = LS->getMemoryVT();
12787 SDValue Loc = LS->getBasePtr();
12788 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12789 }
12790
12791 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12792 EVT VT;
12793 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12794 default: return false;
12795 case Intrinsic::ppc_altivec_lvx:
12796 case Intrinsic::ppc_altivec_lvxl:
12797 case Intrinsic::ppc_vsx_lxvw4x:
12798 case Intrinsic::ppc_vsx_lxvw4x_be:
12799 VT = MVT::v4i32;
12800 break;
12801 case Intrinsic::ppc_vsx_lxvd2x:
12802 case Intrinsic::ppc_vsx_lxvd2x_be:
12803 VT = MVT::v2f64;
12804 break;
12805 case Intrinsic::ppc_altivec_lvebx:
12806 VT = MVT::i8;
12807 break;
12808 case Intrinsic::ppc_altivec_lvehx:
12809 VT = MVT::i16;
12810 break;
12811 case Intrinsic::ppc_altivec_lvewx:
12812 VT = MVT::i32;
12813 break;
12814 }
12815
12816 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12817 }
12818
12819 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12820 EVT VT;
12821 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12822 default: return false;
12823 case Intrinsic::ppc_altivec_stvx:
12824 case Intrinsic::ppc_altivec_stvxl:
12825 case Intrinsic::ppc_vsx_stxvw4x:
12826 VT = MVT::v4i32;
12827 break;
12828 case Intrinsic::ppc_vsx_stxvd2x:
12829 VT = MVT::v2f64;
12830 break;
12831 case Intrinsic::ppc_vsx_stxvw4x_be:
12832 VT = MVT::v4i32;
12833 break;
12834 case Intrinsic::ppc_vsx_stxvd2x_be:
12835 VT = MVT::v2f64;
12836 break;
12837 case Intrinsic::ppc_altivec_stvebx:
12838 VT = MVT::i8;
12839 break;
12840 case Intrinsic::ppc_altivec_stvehx:
12841 VT = MVT::i16;
12842 break;
12843 case Intrinsic::ppc_altivec_stvewx:
12844 VT = MVT::i32;
12845 break;
12846 }
12847
12848 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12849 }
12850
12851 return false;
12852}
12853
12854// Return true is there is a nearyby consecutive load to the one provided
12855// (regardless of alignment). We search up and down the chain, looking though
12856// token factors and other loads (but nothing else). As a result, a true result
12857// indicates that it is safe to create a new consecutive load adjacent to the
12858// load provided.
12859static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12860 SDValue Chain = LD->getChain();
12861 EVT VT = LD->getMemoryVT();
12862
12863 SmallSet<SDNode *, 16> LoadRoots;
12864 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12865 SmallSet<SDNode *, 16> Visited;
12866
12867 // First, search up the chain, branching to follow all token-factor operands.
12868 // If we find a consecutive load, then we're done, otherwise, record all
12869 // nodes just above the top-level loads and token factors.
12870 while (!Queue.empty()) {
12871 SDNode *ChainNext = Queue.pop_back_val();
12872 if (!Visited.insert(ChainNext).second)
12873 continue;
12874
12875 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12876 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12877 return true;
12878
12879 if (!Visited.count(ChainLD->getChain().getNode()))
12880 Queue.push_back(ChainLD->getChain().getNode());
12881 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12882 for (const SDUse &O : ChainNext->ops())
12883 if (!Visited.count(O.getNode()))
12884 Queue.push_back(O.getNode());
12885 } else
12886 LoadRoots.insert(ChainNext);
12887 }
12888
12889 // Second, search down the chain, starting from the top-level nodes recorded
12890 // in the first phase. These top-level nodes are the nodes just above all
12891 // loads and token factors. Starting with their uses, recursively look though
12892 // all loads (just the chain uses) and token factors to find a consecutive
12893 // load.
12894 Visited.clear();
12895 Queue.clear();
12896
12897 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12898 IE = LoadRoots.end(); I != IE; ++I) {
12899 Queue.push_back(*I);
12900
12901 while (!Queue.empty()) {
12902 SDNode *LoadRoot = Queue.pop_back_val();
12903 if (!Visited.insert(LoadRoot).second)
12904 continue;
12905
12906 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12907 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12908 return true;
12909
12910 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12911 UE = LoadRoot->use_end(); UI != UE; ++UI)
12912 if (((isa<MemSDNode>(*UI) &&
12913 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12914 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12915 Queue.push_back(*UI);
12916 }
12917 }
12918
12919 return false;
12920}
12921
12922/// This function is called when we have proved that a SETCC node can be replaced
12923/// by subtraction (and other supporting instructions) so that the result of
12924/// comparison is kept in a GPR instead of CR. This function is purely for
12925/// codegen purposes and has some flags to guide the codegen process.
12926static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12927 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12928 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12928, __PRETTY_FUNCTION__))
;
12929
12930 // Zero extend the operands to the largest legal integer. Originally, they
12931 // must be of a strictly smaller size.
12932 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12933 DAG.getConstant(Size, DL, MVT::i32));
12934 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12935 DAG.getConstant(Size, DL, MVT::i32));
12936
12937 // Swap if needed. Depends on the condition code.
12938 if (Swap)
12939 std::swap(Op0, Op1);
12940
12941 // Subtract extended integers.
12942 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12943
12944 // Move the sign bit to the least significant position and zero out the rest.
12945 // Now the least significant bit carries the result of original comparison.
12946 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12947 DAG.getConstant(Size - 1, DL, MVT::i32));
12948 auto Final = Shifted;
12949
12950 // Complement the result if needed. Based on the condition code.
12951 if (Complement)
12952 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12953 DAG.getConstant(1, DL, MVT::i64));
12954
12955 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12956}
12957
12958SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12959 DAGCombinerInfo &DCI) const {
12960 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12960, __PRETTY_FUNCTION__))
;
12961
12962 SelectionDAG &DAG = DCI.DAG;
12963 SDLoc DL(N);
12964
12965 // Size of integers being compared has a critical role in the following
12966 // analysis, so we prefer to do this when all types are legal.
12967 if (!DCI.isAfterLegalizeDAG())
12968 return SDValue();
12969
12970 // If all users of SETCC extend its value to a legal integer type
12971 // then we replace SETCC with a subtraction
12972 for (SDNode::use_iterator UI = N->use_begin(),
12973 UE = N->use_end(); UI != UE; ++UI) {
12974 if (UI->getOpcode() != ISD::ZERO_EXTEND)
12975 return SDValue();
12976 }
12977
12978 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12979 auto OpSize = N->getOperand(0).getValueSizeInBits();
12980
12981 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12982
12983 if (OpSize < Size) {
12984 switch (CC) {
12985 default: break;
12986 case ISD::SETULT:
12987 return generateEquivalentSub(N, Size, false, false, DL, DAG);
12988 case ISD::SETULE:
12989 return generateEquivalentSub(N, Size, true, true, DL, DAG);
12990 case ISD::SETUGT:
12991 return generateEquivalentSub(N, Size, false, true, DL, DAG);
12992 case ISD::SETUGE:
12993 return generateEquivalentSub(N, Size, true, false, DL, DAG);
12994 }
12995 }
12996
12997 return SDValue();
12998}
12999
13000SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13001 DAGCombinerInfo &DCI) const {
13002 SelectionDAG &DAG = DCI.DAG;
13003 SDLoc dl(N);
13004
13005 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")((Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13005, __PRETTY_FUNCTION__))
;
13006 // If we're tracking CR bits, we need to be careful that we don't have:
13007 // trunc(binary-ops(zext(x), zext(y)))
13008 // or
13009 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13010 // such that we're unnecessarily moving things into GPRs when it would be
13011 // better to keep them in CR bits.
13012
13013 // Note that trunc here can be an actual i1 trunc, or can be the effective
13014 // truncation that comes from a setcc or select_cc.
13015 if (N->getOpcode() == ISD::TRUNCATE &&
13016 N->getValueType(0) != MVT::i1)
13017 return SDValue();
13018
13019 if (N->getOperand(0).getValueType() != MVT::i32 &&
13020 N->getOperand(0).getValueType() != MVT::i64)
13021 return SDValue();
13022
13023 if (N->getOpcode() == ISD::SETCC ||
13024 N->getOpcode() == ISD::SELECT_CC) {
13025 // If we're looking at a comparison, then we need to make sure that the
13026 // high bits (all except for the first) don't matter the result.
13027 ISD::CondCode CC =
13028 cast<CondCodeSDNode>(N->getOperand(
13029 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13030 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13031
13032 if (ISD::isSignedIntSetCC(CC)) {
13033 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13034 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13035 return SDValue();
13036 } else if (ISD::isUnsignedIntSetCC(CC)) {
13037 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13038 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13039 !DAG.MaskedValueIsZero(N->getOperand(1),
13040 APInt::getHighBitsSet(OpBits, OpBits-1)))
13041 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13042 : SDValue());
13043 } else {
13044 // This is neither a signed nor an unsigned comparison, just make sure
13045 // that the high bits are equal.
13046 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13047 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13048
13049 // We don't really care about what is known about the first bit (if
13050 // anything), so clear it in all masks prior to comparing them.
13051 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13052 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13053
13054 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13055 return SDValue();
13056 }
13057 }
13058
13059 // We now know that the higher-order bits are irrelevant, we just need to
13060 // make sure that all of the intermediate operations are bit operations, and
13061 // all inputs are extensions.
13062 if (N->getOperand(0).getOpcode() != ISD::AND &&
13063 N->getOperand(0).getOpcode() != ISD::OR &&
13064 N->getOperand(0).getOpcode() != ISD::XOR &&
13065 N->getOperand(0).getOpcode() != ISD::SELECT &&
13066 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13067 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13068 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13069 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13070 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13071 return SDValue();
13072
13073 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13074 N->getOperand(1).getOpcode() != ISD::AND &&
13075 N->getOperand(1).getOpcode() != ISD::OR &&
13076 N->getOperand(1).getOpcode() != ISD::XOR &&
13077 N->getOperand(1).getOpcode() != ISD::SELECT &&
13078 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13079 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13080 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13081 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13082 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13083 return SDValue();
13084
13085 SmallVector<SDValue, 4> Inputs;
13086 SmallVector<SDValue, 8> BinOps, PromOps;
13087 SmallPtrSet<SDNode *, 16> Visited;
13088
13089 for (unsigned i = 0; i < 2; ++i) {
13090 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13091 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13092 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13093 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13094 isa<ConstantSDNode>(N->getOperand(i)))
13095 Inputs.push_back(N->getOperand(i));
13096 else
13097 BinOps.push_back(N->getOperand(i));
13098
13099 if (N->getOpcode() == ISD::TRUNCATE)
13100 break;
13101 }
13102
13103 // Visit all inputs, collect all binary operations (and, or, xor and
13104 // select) that are all fed by extensions.
13105 while (!BinOps.empty()) {
13106 SDValue BinOp = BinOps.back();
13107 BinOps.pop_back();
13108
13109 if (!Visited.insert(BinOp.getNode()).second)
13110 continue;
13111
13112 PromOps.push_back(BinOp);
13113
13114 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13115 // The condition of the select is not promoted.
13116 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13117 continue;
13118 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13119 continue;
13120
13121 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13122 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13123 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13124 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13125 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13126 Inputs.push_back(BinOp.getOperand(i));
13127 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13128 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13129 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13130 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13131 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13132 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13133 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13134 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13135 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13136 BinOps.push_back(BinOp.getOperand(i));
13137 } else {
13138 // We have an input that is not an extension or another binary
13139 // operation; we'll abort this transformation.
13140 return SDValue();
13141 }
13142 }
13143 }
13144
13145 // Make sure that this is a self-contained cluster of operations (which
13146 // is not quite the same thing as saying that everything has only one
13147 // use).
13148 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13149 if (isa<ConstantSDNode>(Inputs[i]))
13150 continue;
13151
13152 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13153 UE = Inputs[i].getNode()->use_end();
13154 UI != UE; ++UI) {
13155 SDNode *User = *UI;
13156 if (User != N && !Visited.count(User))
13157 return SDValue();
13158
13159 // Make sure that we're not going to promote the non-output-value
13160 // operand(s) or SELECT or SELECT_CC.
13161 // FIXME: Although we could sometimes handle this, and it does occur in
13162 // practice that one of the condition inputs to the select is also one of
13163 // the outputs, we currently can't deal with this.
13164 if (User->getOpcode() == ISD::SELECT) {
13165 if (User->getOperand(0) == Inputs[i])
13166 return SDValue();
13167 } else if (User->getOpcode() == ISD::SELECT_CC) {
13168 if (User->getOperand(0) == Inputs[i] ||
13169 User->getOperand(1) == Inputs[i])
13170 return SDValue();
13171 }
13172 }
13173 }
13174
13175 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13176 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13177 UE = PromOps[i].getNode()->use_end();
13178 UI != UE; ++UI) {
13179 SDNode *User = *UI;
13180 if (User != N && !Visited.count(User))
13181 return SDValue();
13182
13183 // Make sure that we're not going to promote the non-output-value
13184 // operand(s) or SELECT or SELECT_CC.
13185 // FIXME: Although we could sometimes handle this, and it does occur in
13186 // practice that one of the condition inputs to the select is also one of
13187 // the outputs, we currently can't deal with this.
13188 if (User->getOpcode() == ISD::SELECT) {
13189 if (User->getOperand(0) == PromOps[i])
13190 return SDValue();
13191 } else if (User->getOpcode() == ISD::SELECT_CC) {
13192 if (User->getOperand(0) == PromOps[i] ||
13193 User->getOperand(1) == PromOps[i])
13194 return SDValue();
13195 }
13196 }
13197 }
13198
13199 // Replace all inputs with the extension operand.
13200 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13201 // Constants may have users outside the cluster of to-be-promoted nodes,
13202 // and so we need to replace those as we do the promotions.
13203 if (isa<ConstantSDNode>(Inputs[i]))
13204 continue;
13205 else
13206 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13207 }
13208
13209 std::list<HandleSDNode> PromOpHandles;
13210 for (auto &PromOp : PromOps)
13211 PromOpHandles.emplace_back(PromOp);
13212
13213 // Replace all operations (these are all the same, but have a different
13214 // (i1) return type). DAG.getNode will validate that the types of
13215 // a binary operator match, so go through the list in reverse so that
13216 // we've likely promoted both operands first. Any intermediate truncations or
13217 // extensions disappear.
13218 while (!PromOpHandles.empty()) {
13219 SDValue PromOp = PromOpHandles.back().getValue();
13220 PromOpHandles.pop_back();
13221
13222 if (PromOp.getOpcode() == ISD::TRUNCATE ||
13223 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13224 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13225 PromOp.getOpcode() == ISD::ANY_EXTEND) {
13226 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13227 PromOp.getOperand(0).getValueType() != MVT::i1) {
13228 // The operand is not yet ready (see comment below).
13229 PromOpHandles.emplace_front(PromOp);
13230 continue;
13231 }
13232
13233 SDValue RepValue = PromOp.getOperand(0);
13234 if (isa<ConstantSDNode>(RepValue))
13235 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13236
13237 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13238 continue;
13239 }
13240
13241 unsigned C;
13242 switch (PromOp.getOpcode()) {
13243 default: C = 0; break;
13244 case ISD::SELECT: C = 1; break;
13245 case ISD::SELECT_CC: C = 2; break;
13246 }
13247
13248 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13249 PromOp.getOperand(C).getValueType() != MVT::i1) ||
13250 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13251 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13252 // The to-be-promoted operands of this node have not yet been
13253 // promoted (this should be rare because we're going through the
13254 // list backward, but if one of the operands has several users in
13255 // this cluster of to-be-promoted nodes, it is possible).
13256 PromOpHandles.emplace_front(PromOp);
13257 continue;
13258 }
13259
13260 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13261 PromOp.getNode()->op_end());
13262
13263 // If there are any constant inputs, make sure they're replaced now.
13264 for (unsigned i = 0; i < 2; ++i)
13265 if (isa<ConstantSDNode>(Ops[C+i]))
13266 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13267
13268 DAG.ReplaceAllUsesOfValueWith(PromOp,
13269 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13270 }
13271
13272 // Now we're left with the initial truncation itself.
13273 if (N->getOpcode() == ISD::TRUNCATE)
13274 return N->getOperand(0);
13275
13276 // Otherwise, this is a comparison. The operands to be compared have just
13277 // changed type (to i1), but everything else is the same.
13278 return SDValue(N, 0);
13279}
13280
13281SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13282 DAGCombinerInfo &DCI) const {
13283 SelectionDAG &DAG = DCI.DAG;
13284 SDLoc dl(N);
13285
13286 // If we're tracking CR bits, we need to be careful that we don't have:
13287 // zext(binary-ops(trunc(x), trunc(y)))
13288 // or
13289 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13290 // such that we're unnecessarily moving things into CR bits that can more
13291 // efficiently stay in GPRs. Note that if we're not certain that the high
13292 // bits are set as required by the final extension, we still may need to do
13293 // some masking to get the proper behavior.
13294
13295 // This same functionality is important on PPC64 when dealing with
13296 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13297 // the return values of functions. Because it is so similar, it is handled
13298 // here as well.
13299
13300 if (N->getValueType(0) != MVT::i32 &&
13301 N->getValueType(0) != MVT::i64)
13302 return SDValue();
13303
13304 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13305 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13306 return SDValue();
13307
13308 if (N->getOperand(0).getOpcode() != ISD::AND &&
13309 N->getOperand(0).getOpcode() != ISD::OR &&
13310 N->getOperand(0).getOpcode() != ISD::XOR &&
13311 N->getOperand(0).getOpcode() != ISD::SELECT &&
13312 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13313 return SDValue();
13314
13315 SmallVector<SDValue, 4> Inputs;
13316 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13317 SmallPtrSet<SDNode *, 16> Visited;
13318
13319 // Visit all inputs, collect all binary operations (and, or, xor and
13320 // select) that are all fed by truncations.
13321 while (!BinOps.empty()) {
13322 SDValue BinOp = BinOps.back();
13323 BinOps.pop_back();
13324
13325 if (!Visited.insert(BinOp.getNode()).second)
13326 continue;
13327
13328 PromOps.push_back(BinOp);
13329
13330 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13331 // The condition of the select is not promoted.
13332 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13333 continue;
13334 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13335 continue;
13336
13337 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13338 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13339 Inputs.push_back(BinOp.getOperand(i));
13340 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13341 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13342 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13343 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13344 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13345 BinOps.push_back(BinOp.getOperand(i));
13346 } else {
13347 // We have an input that is not a truncation or another binary
13348 // operation; we'll abort this transformation.
13349 return SDValue();
13350 }
13351 }
13352 }
13353
13354 // The operands of a select that must be truncated when the select is
13355 // promoted because the operand is actually part of the to-be-promoted set.
13356 DenseMap<SDNode *, EVT> SelectTruncOp[2];
13357
13358 // Make sure that this is a self-contained cluster of operations (which
13359 // is not quite the same thing as saying that everything has only one
13360 // use).
13361 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13362 if (isa<ConstantSDNode>(Inputs[i]))
13363 continue;
13364
13365 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13366 UE = Inputs[i].getNode()->use_end();
13367 UI != UE; ++UI) {
13368 SDNode *User = *UI;
13369 if (User != N && !Visited.count(User))
13370 return SDValue();
13371
13372 // If we're going to promote the non-output-value operand(s) or SELECT or
13373 // SELECT_CC, record them for truncation.
13374 if (User->getOpcode() == ISD::SELECT) {
13375 if (User->getOperand(0) == Inputs[i])
13376 SelectTruncOp[0].insert(std::make_pair(User,
13377 User->getOperand(0).getValueType()));
13378 } else if (User->getOpcode() == ISD::SELECT_CC) {
13379 if (User->getOperand(0) == Inputs[i])
13380 SelectTruncOp[0].insert(std::make_pair(User,
13381 User->getOperand(0).getValueType()));
13382 if (User->getOperand(1) == Inputs[i])
13383 SelectTruncOp[1].insert(std::make_pair(User,
13384 User->getOperand(1).getValueType()));
13385 }
13386 }
13387 }
13388
13389 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13390 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13391 UE = PromOps[i].getNode()->use_end();
13392 UI != UE; ++UI) {
13393 SDNode *User = *UI;
13394 if (User != N && !Visited.count(User))
13395 return SDValue();
13396
13397 // If we're going to promote the non-output-value operand(s) or SELECT or
13398 // SELECT_CC, record them for truncation.
13399 if (User->getOpcode() == ISD::SELECT) {
13400 if (User->getOperand(0) == PromOps[i])
13401 SelectTruncOp[0].insert(std::make_pair(User,
13402 User->getOperand(0).getValueType()));
13403 } else if (User->getOpcode() == ISD::SELECT_CC) {
13404 if (User->getOperand(0) == PromOps[i])
13405 SelectTruncOp[0].insert(std::make_pair(User,
13406 User->getOperand(0).getValueType()));
13407 if (User->getOperand(1) == PromOps[i])
13408 SelectTruncOp[1].insert(std::make_pair(User,
13409 User->getOperand(1).getValueType()));
13410 }
13411 }
13412 }
13413
13414 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13415 bool ReallyNeedsExt = false;
13416 if (N->getOpcode() != ISD::ANY_EXTEND) {
13417 // If all of the inputs are not already sign/zero extended, then
13418 // we'll still need to do that at the end.
13419 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13420 if (isa<ConstantSDNode>(Inputs[i]))
13421 continue;
13422
13423 unsigned OpBits =
13424 Inputs[i].getOperand(0).getValueSizeInBits();
13425 assert(PromBits < OpBits && "Truncation not to a smaller bit count?")((PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? static_cast<void> (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13425, __PRETTY_FUNCTION__))
;
13426
13427 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13428 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13429 APInt::getHighBitsSet(OpBits,
13430 OpBits-PromBits))) ||
13431 (N->getOpcode() == ISD::SIGN_EXTEND &&
13432 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13433 (OpBits-(PromBits-1)))) {
13434 ReallyNeedsExt = true;
13435 break;
13436 }
13437 }
13438 }
13439
13440 // Replace all inputs, either with the truncation operand, or a
13441 // truncation or extension to the final output type.
13442 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13443 // Constant inputs need to be replaced with the to-be-promoted nodes that
13444 // use them because they might have users outside of the cluster of
13445 // promoted nodes.
13446 if (isa<ConstantSDNode>(Inputs[i]))
13447 continue;
13448
13449 SDValue InSrc = Inputs[i].getOperand(0);
13450 if (Inputs[i].getValueType() == N->getValueType(0))
13451 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13452 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13453 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13454 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13455 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13456 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13457 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13458 else
13459 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13460 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13461 }
13462
13463 std::list<HandleSDNode> PromOpHandles;
13464 for (auto &PromOp : PromOps)
13465 PromOpHandles.emplace_back(PromOp);
13466
13467 // Replace all operations (these are all the same, but have a different
13468 // (promoted) return type). DAG.getNode will validate that the types of
13469 // a binary operator match, so go through the list in reverse so that
13470 // we've likely promoted both operands first.
13471 while (!PromOpHandles.empty()) {
13472 SDValue PromOp = PromOpHandles.back().getValue();
13473 PromOpHandles.pop_back();
13474
13475 unsigned C;
13476 switch (PromOp.getOpcode()) {
13477 default: C = 0; break;
13478 case ISD::SELECT: C = 1; break;
13479 case ISD::SELECT_CC: C = 2; break;
13480 }
13481
13482 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13483 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13484 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13485 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13486 // The to-be-promoted operands of this node have not yet been
13487 // promoted (this should be rare because we're going through the
13488 // list backward, but if one of the operands has several users in
13489 // this cluster of to-be-promoted nodes, it is possible).
13490 PromOpHandles.emplace_front(PromOp);
13491 continue;
13492 }
13493
13494 // For SELECT and SELECT_CC nodes, we do a similar check for any
13495 // to-be-promoted comparison inputs.
13496 if (PromOp.getOpcode() == ISD::SELECT ||
13497 PromOp.getOpcode() == ISD::SELECT_CC) {
13498 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13499 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13500 (SelectTruncOp[1].count(PromOp.getNode()) &&
13501 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13502 PromOpHandles.emplace_front(PromOp);
13503 continue;
13504 }
13505 }
13506
13507 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13508 PromOp.getNode()->op_end());
13509
13510 // If this node has constant inputs, then they'll need to be promoted here.
13511 for (unsigned i = 0; i < 2; ++i) {
13512 if (!isa<ConstantSDNode>(Ops[C+i]))
13513 continue;
13514 if (Ops[C+i].getValueType() == N->getValueType(0))
13515 continue;
13516
13517 if (N->getOpcode() == ISD::SIGN_EXTEND)
13518 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13519 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13520 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13521 else
13522 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13523 }
13524
13525 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13526 // truncate them again to the original value type.
13527 if (PromOp.getOpcode() == ISD::SELECT ||
13528 PromOp.getOpcode() == ISD::SELECT_CC) {
13529 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13530 if (SI0 != SelectTruncOp[0].end())
13531 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13532 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13533 if (SI1 != SelectTruncOp[1].end())
13534 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13535 }
13536
13537 DAG.ReplaceAllUsesOfValueWith(PromOp,
13538 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13539 }
13540
13541 // Now we're left with the initial extension itself.
13542 if (!ReallyNeedsExt)
13543 return N->getOperand(0);
13544
13545 // To zero extend, just mask off everything except for the first bit (in the
13546 // i1 case).
13547 if (N->getOpcode() == ISD::ZERO_EXTEND)
13548 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13549 DAG.getConstant(APInt::getLowBitsSet(
13550 N->getValueSizeInBits(0), PromBits),
13551 dl, N->getValueType(0)));
13552
13553 assert(N->getOpcode() == ISD::SIGN_EXTEND &&((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13554, __PRETTY_FUNCTION__))
13554 "Invalid extension type")((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13554, __PRETTY_FUNCTION__))
;
13555 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13556 SDValue ShiftCst =
13557 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13558 return DAG.getNode(
13559 ISD::SRA, dl, N->getValueType(0),
13560 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13561 ShiftCst);
13562}
13563
13564SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13565 DAGCombinerInfo &DCI) const {
13566 assert(N->getOpcode() == ISD::SETCC &&((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13567, __PRETTY_FUNCTION__))
13567 "Should be called with a SETCC node")((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13567, __PRETTY_FUNCTION__))
;
13568
13569 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13570 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13571 SDValue LHS = N->getOperand(0);
13572 SDValue RHS = N->getOperand(1);
13573
13574 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13575 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13576 LHS.hasOneUse())
13577 std::swap(LHS, RHS);
13578
13579 // x == 0-y --> x+y == 0
13580 // x != 0-y --> x+y != 0
13581 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13582 RHS.hasOneUse()) {
13583 SDLoc DL(N);
13584 SelectionDAG &DAG = DCI.DAG;
13585 EVT VT = N->getValueType(0);
13586 EVT OpVT = LHS.getValueType();
13587 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13588 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13589 }
13590 }
13591
13592 return DAGCombineTruncBoolExt(N, DCI);
13593}
13594
13595// Is this an extending load from an f32 to an f64?
13596static bool isFPExtLoad(SDValue Op) {
13597 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13598 return LD->getExtensionType() == ISD::EXTLOAD &&
13599 Op.getValueType() == MVT::f64;
13600 return false;
13601}
13602
13603/// Reduces the number of fp-to-int conversion when building a vector.
13604///
13605/// If this vector is built out of floating to integer conversions,
13606/// transform it to a vector built out of floating point values followed by a
13607/// single floating to integer conversion of the vector.
13608/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13609/// becomes (fptosi (build_vector ($A, $B, ...)))
13610SDValue PPCTargetLowering::
13611combineElementTruncationToVectorTruncation(SDNode *N,
13612 DAGCombinerInfo &DCI) const {
13613 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13614, __PRETTY_FUNCTION__))
13614 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13614, __PRETTY_FUNCTION__))
;
13615
13616 SelectionDAG &DAG = DCI.DAG;
13617 SDLoc dl(N);
13618
13619 SDValue FirstInput = N->getOperand(0);
13620 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__))
13621 "The input operand must be an fp-to-int conversion.")((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__))
;
13622
13623 // This combine happens after legalization so the fp_to_[su]i nodes are
13624 // already converted to PPCSISD nodes.
13625 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13626 if (FirstConversion == PPCISD::FCTIDZ ||
13627 FirstConversion == PPCISD::FCTIDUZ ||
13628 FirstConversion == PPCISD::FCTIWZ ||
13629 FirstConversion == PPCISD::FCTIWUZ) {
13630 bool IsSplat = true;
13631 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13632 FirstConversion == PPCISD::FCTIWUZ;
13633 EVT SrcVT = FirstInput.getOperand(0).getValueType();
13634 SmallVector<SDValue, 4> Ops;
13635 EVT TargetVT = N->getValueType(0);
13636 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13637 SDValue NextOp = N->getOperand(i);
13638 if (NextOp.getOpcode() != PPCISD::MFVSR)
13639 return SDValue();
13640 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13641 if (NextConversion != FirstConversion)
13642 return SDValue();
13643 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13644 // This is not valid if the input was originally double precision. It is
13645 // also not profitable to do unless this is an extending load in which
13646 // case doing this combine will allow us to combine consecutive loads.
13647 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13648 return SDValue();
13649 if (N->getOperand(i) != FirstInput)
13650 IsSplat = false;
13651 }
13652
13653 // If this is a splat, we leave it as-is since there will be only a single
13654 // fp-to-int conversion followed by a splat of the integer. This is better
13655 // for 32-bit and smaller ints and neutral for 64-bit ints.
13656 if (IsSplat)
13657 return SDValue();
13658
13659 // Now that we know we have the right type of node, get its operands
13660 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13661 SDValue In = N->getOperand(i).getOperand(0);
13662 if (Is32Bit) {
13663 // For 32-bit values, we need to add an FP_ROUND node (if we made it
13664 // here, we know that all inputs are extending loads so this is safe).
13665 if (In.isUndef())
13666 Ops.push_back(DAG.getUNDEF(SrcVT));
13667 else {
13668 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13669 MVT::f32, In.getOperand(0),
13670 DAG.getIntPtrConstant(1, dl));
13671 Ops.push_back(Trunc);
13672 }
13673 } else
13674 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13675 }
13676
13677 unsigned Opcode;
13678 if (FirstConversion == PPCISD::FCTIDZ ||
13679 FirstConversion == PPCISD::FCTIWZ)
13680 Opcode = ISD::FP_TO_SINT;
13681 else
13682 Opcode = ISD::FP_TO_UINT;
13683
13684 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13685 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13686 return DAG.getNode(Opcode, dl, TargetVT, BV);
13687 }
13688 return SDValue();
13689}
13690
13691/// Reduce the number of loads when building a vector.
13692///
13693/// Building a vector out of multiple loads can be converted to a load
13694/// of the vector type if the loads are consecutive. If the loads are
13695/// consecutive but in descending order, a shuffle is added at the end
13696/// to reorder the vector.
13697static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13698 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13699, __PRETTY_FUNCTION__))
13699 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13699, __PRETTY_FUNCTION__))
;
13700
13701 SDLoc dl(N);
13702
13703 // Return early for non byte-sized type, as they can't be consecutive.
13704 if (!N->getValueType(0).getVectorElementType().isByteSized())
13705 return SDValue();
13706
13707 bool InputsAreConsecutiveLoads = true;
13708 bool InputsAreReverseConsecutive = true;
13709 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13710 SDValue FirstInput = N->getOperand(0);
13711 bool IsRoundOfExtLoad = false;
13712
13713 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13714 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13715 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13716 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13717 }
13718 // Not a build vector of (possibly fp_rounded) loads.
13719 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13720 N->getNumOperands() == 1)
13721 return SDValue();
13722
13723 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13724 // If any inputs are fp_round(extload), they all must be.
13725 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13726 return SDValue();
13727
13728 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13729 N->getOperand(i);
13730 if (NextInput.getOpcode() != ISD::LOAD)
13731 return SDValue();
13732
13733 SDValue PreviousInput =
13734 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13735 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13736 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13737
13738 // If any inputs are fp_round(extload), they all must be.
13739 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13740 return SDValue();
13741
13742 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13743 InputsAreConsecutiveLoads = false;
13744 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13745 InputsAreReverseConsecutive = false;
13746
13747 // Exit early if the loads are neither consecutive nor reverse consecutive.
13748 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13749 return SDValue();
13750 }
13751
13752 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13753, __PRETTY_FUNCTION__))
13753 "The loads cannot be both consecutive and reverse consecutive.")((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13753, __PRETTY_FUNCTION__))
;
13754
13755 SDValue FirstLoadOp =
13756 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13757 SDValue LastLoadOp =
13758 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13759 N->getOperand(N->getNumOperands()-1);
13760
13761 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13762 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13763 if (InputsAreConsecutiveLoads) {
13764 assert(LD1 && "Input needs to be a LoadSDNode.")((LD1 && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13764, __PRETTY_FUNCTION__))
;
13765 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13766 LD1->getBasePtr(), LD1->getPointerInfo(),
13767 LD1->getAlignment());
13768 }
13769 if (InputsAreReverseConsecutive) {
13770 assert(LDL && "Input needs to be a LoadSDNode.")((LDL && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13770, __PRETTY_FUNCTION__))
;
13771 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13772 LDL->getBasePtr(), LDL->getPointerInfo(),
13773 LDL->getAlignment());
13774 SmallVector<int, 16> Ops;
13775 for (int i = N->getNumOperands() - 1; i >= 0; i--)
13776 Ops.push_back(i);
13777
13778 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13779 DAG.getUNDEF(N->getValueType(0)), Ops);
13780 }
13781 return SDValue();
13782}
13783
13784// This function adds the required vector_shuffle needed to get
13785// the elements of the vector extract in the correct position
13786// as specified by the CorrectElems encoding.
13787static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13788 SDValue Input, uint64_t Elems,
13789 uint64_t CorrectElems) {
13790 SDLoc dl(N);
13791
13792 unsigned NumElems = Input.getValueType().getVectorNumElements();
13793 SmallVector<int, 16> ShuffleMask(NumElems, -1);
13794
13795 // Knowing the element indices being extracted from the original
13796 // vector and the order in which they're being inserted, just put
13797 // them at element indices required for the instruction.
13798 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13799 if (DAG.getDataLayout().isLittleEndian())
13800 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13801 else
13802 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13803 CorrectElems = CorrectElems >> 8;
13804 Elems = Elems >> 8;
13805 }
13806
13807 SDValue Shuffle =
13808 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13809 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13810
13811 EVT VT = N->getValueType(0);
13812 SDValue Conv = DAG.getBitcast(VT, Shuffle);
13813
13814 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13815 Input.getValueType().getVectorElementType(),
13816 VT.getVectorNumElements());
13817 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13818 DAG.getValueType(ExtVT));
13819}
13820
13821// Look for build vector patterns where input operands come from sign
13822// extended vector_extract elements of specific indices. If the correct indices
13823// aren't used, add a vector shuffle to fix up the indices and create
13824// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13825// during instruction selection.
13826static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13827 // This array encodes the indices that the vector sign extend instructions
13828 // extract from when extending from one type to another for both BE and LE.
13829 // The right nibble of each byte corresponds to the LE incides.
13830 // and the left nibble of each byte corresponds to the BE incides.
13831 // For example: 0x3074B8FC byte->word
13832 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13833 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13834 // For example: 0x000070F8 byte->double word
13835 // For LE: the allowed indices are: 0x0,0x8
13836 // For BE: the allowed indices are: 0x7,0xF
13837 uint64_t TargetElems[] = {
13838 0x3074B8FC, // b->w
13839 0x000070F8, // b->d
13840 0x10325476, // h->w
13841 0x00003074, // h->d
13842 0x00001032, // w->d
13843 };
13844
13845 uint64_t Elems = 0;
13846 int Index;
13847 SDValue Input;
13848
13849 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13850 if (!Op)
13851 return false;
13852 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13853 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13854 return false;
13855
13856 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13857 // of the right width.
13858 SDValue Extract = Op.getOperand(0);
13859 if (Extract.getOpcode() == ISD::ANY_EXTEND)
13860 Extract = Extract.getOperand(0);
13861 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13862 return false;
13863
13864 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13865 if (!ExtOp)
13866 return false;
13867
13868 Index = ExtOp->getZExtValue();
13869 if (Input && Input != Extract.getOperand(0))
13870 return false;
13871
13872 if (!Input)
13873 Input = Extract.getOperand(0);
13874
13875 Elems = Elems << 8;
13876 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13877 Elems |= Index;
13878
13879 return true;
13880 };
13881
13882 // If the build vector operands aren't sign extended vector extracts,
13883 // of the same input vector, then return.
13884 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13885 if (!isSExtOfVecExtract(N->getOperand(i))) {
13886 return SDValue();
13887 }
13888 }
13889
13890 // If the vector extract indicies are not correct, add the appropriate
13891 // vector_shuffle.
13892 int TgtElemArrayIdx;
13893 int InputSize = Input.getValueType().getScalarSizeInBits();
13894 int OutputSize = N->getValueType(0).getScalarSizeInBits();
13895 if (InputSize + OutputSize == 40)
13896 TgtElemArrayIdx = 0;
13897 else if (InputSize + OutputSize == 72)
13898 TgtElemArrayIdx = 1;
13899 else if (InputSize + OutputSize == 48)
13900 TgtElemArrayIdx = 2;
13901 else if (InputSize + OutputSize == 80)
13902 TgtElemArrayIdx = 3;
13903 else if (InputSize + OutputSize == 96)
13904 TgtElemArrayIdx = 4;
13905 else
13906 return SDValue();
13907
13908 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13909 CorrectElems = DAG.getDataLayout().isLittleEndian()
13910 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13911 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13912 if (Elems != CorrectElems) {
13913 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13914 }
13915
13916 // Regular lowering will catch cases where a shuffle is not needed.
13917 return SDValue();
13918}
13919
13920// Look for the pattern of a load from a narrow width to i128, feeding
13921// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13922// (LXVRZX). This node represents a zero extending load that will be matched
13923// to the Load VSX Vector Rightmost instructions.
13924static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13925 SDLoc DL(N);
13926
13927 // This combine is only eligible for a BUILD_VECTOR of v1i128.
13928 if (N->getValueType(0) != MVT::v1i128)
13929 return SDValue();
13930
13931 SDValue Operand = N->getOperand(0);
13932 // Proceed with the transformation if the operand to the BUILD_VECTOR
13933 // is a load instruction.
13934 if (Operand.getOpcode() != ISD::LOAD)
13935 return SDValue();
13936
13937 LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13938 EVT MemoryType = LD->getMemoryVT();
13939
13940 // This transformation is only valid if the we are loading either a byte,
13941 // halfword, word, or doubleword.
13942 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13943 MemoryType == MVT::i32 || MemoryType == MVT::i64;
13944
13945 // Ensure that the load from the narrow width is being zero extended to i128.
13946 if (!ValidLDType ||
13947 (LD->getExtensionType() != ISD::ZEXTLOAD &&
13948 LD->getExtensionType() != ISD::EXTLOAD))
13949 return SDValue();
13950
13951 SDValue LoadOps[] = {
13952 LD->getChain(), LD->getBasePtr(),
13953 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13954
13955 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
13956 DAG.getVTList(MVT::v1i128, MVT::Other),
13957 LoadOps, MemoryType, LD->getMemOperand());
13958}
13959
13960SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13961 DAGCombinerInfo &DCI) const {
13962 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13963, __PRETTY_FUNCTION__))
13963 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13963, __PRETTY_FUNCTION__))
;
13964
13965 SelectionDAG &DAG = DCI.DAG;
13966 SDLoc dl(N);
13967
13968 if (!Subtarget.hasVSX())
13969 return SDValue();
13970
13971 // The target independent DAG combiner will leave a build_vector of
13972 // float-to-int conversions intact. We can generate MUCH better code for
13973 // a float-to-int conversion of a vector of floats.
13974 SDValue FirstInput = N->getOperand(0);
13975 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13976 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13977 if (Reduced)
13978 return Reduced;
13979 }
13980
13981 // If we're building a vector out of consecutive loads, just load that
13982 // vector type.
13983 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13984 if (Reduced)
13985 return Reduced;
13986
13987 // If we're building a vector out of extended elements from another vector
13988 // we have P9 vector integer extend instructions. The code assumes legal
13989 // input types (i.e. it can't handle things like v4i16) so do not run before
13990 // legalization.
13991 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13992 Reduced = combineBVOfVecSExt(N, DAG);
13993 if (Reduced)
13994 return Reduced;
13995 }
13996
13997 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13998 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13999 // is a load from <valid narrow width> to i128.
14000 if (Subtarget.isISA3_1()) {
14001 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14002 if (BVOfZLoad)
14003 return BVOfZLoad;
14004 }
14005
14006 if (N->getValueType(0) != MVT::v2f64)
14007 return SDValue();
14008
14009 // Looking for:
14010 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14011 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14012 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14013 return SDValue();
14014 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14015 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14016 return SDValue();
14017 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14018 return SDValue();
14019
14020 SDValue Ext1 = FirstInput.getOperand(0);
14021 SDValue Ext2 = N->getOperand(1).getOperand(0);
14022 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14023 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14024 return SDValue();
14025
14026 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14027 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14028 if (!Ext1Op || !Ext2Op)
14029 return SDValue();
14030 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14031 Ext1.getOperand(0) != Ext2.getOperand(0))
14032 return SDValue();
14033
14034 int FirstElem = Ext1Op->getZExtValue();
14035 int SecondElem = Ext2Op->getZExtValue();
14036 int SubvecIdx;
14037 if (FirstElem == 0 && SecondElem == 1)
14038 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14039 else if (FirstElem == 2 && SecondElem == 3)
14040 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14041 else
14042 return SDValue();
14043
14044 SDValue SrcVec = Ext1.getOperand(0);
14045 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14046 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14047 return DAG.getNode(NodeType, dl, MVT::v2f64,
14048 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14049}
14050
14051SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14052 DAGCombinerInfo &DCI) const {
14053 assert((N->getOpcode() == ISD::SINT_TO_FP ||(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))
14054 N->getOpcode() == ISD::UINT_TO_FP) &&(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))
14055 "Need an int -> FP conversion node here")(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))
;
14056
14057 if (useSoftFloat() || !Subtarget.has64BitSupport())
14058 return SDValue();
14059
14060 SelectionDAG &DAG = DCI.DAG;
14061 SDLoc dl(N);
14062 SDValue Op(N, 0);
14063
14064 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14065 // from the hardware.
14066 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14067 return SDValue();
14068 if (!Op.getOperand(0).getValueType().isSimple())
14069 return SDValue();
14070 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14071 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14072 return SDValue();
14073
14074 SDValue FirstOperand(Op.getOperand(0));
14075 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14076 (FirstOperand.getValueType() == MVT::i8 ||
14077 FirstOperand.getValueType() == MVT::i16);
14078 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14079 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14080 bool DstDouble = Op.getValueType() == MVT::f64;
14081 unsigned ConvOp = Signed ?
14082 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14083 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14084 SDValue WidthConst =
14085 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14086 dl, false);
14087 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14088 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14089 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14090 DAG.getVTList(MVT::f64, MVT::Other),
14091 Ops, MVT::i8, LDN->getMemOperand());
14092
14093 // For signed conversion, we need to sign-extend the value in the VSR
14094 if (Signed) {
14095 SDValue ExtOps[] = { Ld, WidthConst };
14096 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14097 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14098 } else
14099 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14100 }
14101
14102
14103 // For i32 intermediate values, unfortunately, the conversion functions
14104 // leave the upper 32 bits of the value are undefined. Within the set of
14105 // scalar instructions, we have no method for zero- or sign-extending the
14106 // value. Thus, we cannot handle i32 intermediate values here.
14107 if (Op.getOperand(0).getValueType() == MVT::i32)
14108 return SDValue();
14109
14110 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14111, __PRETTY_FUNCTION__))
14111 "UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14111, __PRETTY_FUNCTION__))
;
14112
14113 // If we have FCFIDS, then use it when converting to single-precision.
14114 // Otherwise, convert to double-precision and then round.
14115 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14116 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14117 : PPCISD::FCFIDS)
14118 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14119 : PPCISD::FCFID);
14120 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14121 ? MVT::f32
14122 : MVT::f64;
14123
14124 // If we're converting from a float, to an int, and back to a float again,
14125 // then we don't need the store/load pair at all.
14126 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14127 Subtarget.hasFPCVT()) ||
14128 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14129 SDValue Src = Op.getOperand(0).getOperand(0);
14130 if (Src.getValueType() == MVT::f32) {
14131 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14132 DCI.AddToWorklist(Src.getNode());
14133 } else if (Src.getValueType() != MVT::f64) {
14134 // Make sure that we don't pick up a ppc_fp128 source value.
14135 return SDValue();
14136 }
14137
14138 unsigned FCTOp =
14139 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14140 PPCISD::FCTIDUZ;
14141
14142 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14143 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14144
14145 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14146 FP = DAG.getNode(ISD::FP_ROUND, dl,
14147 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14148 DCI.AddToWorklist(FP.getNode());
14149 }
14150
14151 return FP;
14152 }
14153
14154 return SDValue();
14155}
14156
14157// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14158// builtins) into loads with swaps.
14159SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14160 DAGCombinerInfo &DCI) const {
14161 SelectionDAG &DAG = DCI.DAG;
14162 SDLoc dl(N);
14163 SDValue Chain;
14164 SDValue Base;
14165 MachineMemOperand *MMO;
14166
14167 switch (N->getOpcode()) {
14168 default:
14169 llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14169)
;
14170 case ISD::LOAD: {
14171 LoadSDNode *LD = cast<LoadSDNode>(N);
14172 Chain = LD->getChain();
14173 Base = LD->getBasePtr();
14174 MMO = LD->getMemOperand();
14175 // If the MMO suggests this isn't a load of a full vector, leave
14176 // things alone. For a built-in, we have to make the change for
14177 // correctness, so if there is a size problem that will be a bug.
14178 if (MMO->getSize() < 16)
14179 return SDValue();
14180 break;
14181 }
14182 case ISD::INTRINSIC_W_CHAIN: {
14183 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14184 Chain = Intrin->getChain();
14185 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14186 // us what we want. Get operand 2 instead.
14187 Base = Intrin->getOperand(2);
14188 MMO = Intrin->getMemOperand();
14189 break;
14190 }
14191 }
14192
14193 MVT VecTy = N->getValueType(0).getSimpleVT();
14194
14195 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14196 // aligned and the type is a vector with elements up to 4 bytes
14197 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14198 VecTy.getScalarSizeInBits() <= 32) {
14199 return SDValue();
14200 }
14201
14202 SDValue LoadOps[] = { Chain, Base };
14203 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14204 DAG.getVTList(MVT::v2f64, MVT::Other),
14205 LoadOps, MVT::v2f64, MMO);
14206
14207 DCI.AddToWorklist(Load.getNode());
14208 Chain = Load.getValue(1);
14209 SDValue Swap = DAG.getNode(
14210 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14211 DCI.AddToWorklist(Swap.getNode());
14212
14213 // Add a bitcast if the resulting load type doesn't match v2f64.
14214 if (VecTy != MVT::v2f64) {
14215 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14216 DCI.AddToWorklist(N.getNode());
14217 // Package {bitcast value, swap's chain} to match Load's shape.
14218 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14219 N, Swap.getValue(1));
14220 }
14221
14222 return Swap;
14223}
14224
14225// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14226// builtins) into stores with swaps.
14227SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14228 DAGCombinerInfo &DCI) const {
14229 SelectionDAG &DAG = DCI.DAG;
14230 SDLoc dl(N);
14231 SDValue Chain;
14232 SDValue Base;
14233 unsigned SrcOpnd;
14234 MachineMemOperand *MMO;
14235
14236 switch (N->getOpcode()) {
14237 default:
14238 llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14238)
;
14239 case ISD::STORE: {
14240 StoreSDNode *ST = cast<StoreSDNode>(N);
14241 Chain = ST->getChain();
14242 Base = ST->getBasePtr();
14243 MMO = ST->getMemOperand();
14244 SrcOpnd = 1;
14245 // If the MMO suggests this isn't a store of a full vector, leave
14246 // things alone. For a built-in, we have to make the change for
14247 // correctness, so if there is a size problem that will be a bug.
14248 if (MMO->getSize() < 16)
14249 return SDValue();
14250 break;
14251 }
14252 case ISD::INTRINSIC_VOID: {
14253 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14254 Chain = Intrin->getChain();
14255 // Intrin->getBasePtr() oddly does not get what we want.
14256 Base = Intrin->getOperand(3);
14257 MMO = Intrin->getMemOperand();
14258 SrcOpnd = 2;
14259 break;
14260 }
14261 }
14262
14263 SDValue Src = N->getOperand(SrcOpnd);
14264 MVT VecTy = Src.getValueType().getSimpleVT();
14265
14266 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14267 // aligned and the type is a vector with elements up to 4 bytes
14268 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14269 VecTy.getScalarSizeInBits() <= 32) {
14270 return SDValue();
14271 }
14272
14273 // All stores are done as v2f64 and possible bit cast.
14274 if (VecTy != MVT::v2f64) {
14275 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14276 DCI.AddToWorklist(Src.getNode());
14277 }
14278
14279 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14280 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14281 DCI.AddToWorklist(Swap.getNode());
14282 Chain = Swap.getValue(1);
14283 SDValue StoreOps[] = { Chain, Swap, Base };
14284 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14285 DAG.getVTList(MVT::Other),
14286 StoreOps, VecTy, MMO);
14287 DCI.AddToWorklist(Store.getNode());
14288 return Store;
14289}
14290
14291// Handle DAG combine for STORE (FP_TO_INT F).
14292SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14293 DAGCombinerInfo &DCI) const {
14294
14295 SelectionDAG &DAG = DCI.DAG;
14296 SDLoc dl(N);
14297 unsigned Opcode = N->getOperand(1).getOpcode();
14298
14299 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14300, __PRETTY_FUNCTION__))
14300 && "Not a FP_TO_INT Instruction!")(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14300, __PRETTY_FUNCTION__))
;
14301
14302 SDValue Val = N->getOperand(1).getOperand(0);
14303 EVT Op1VT = N->getOperand(1).getValueType();
14304 EVT ResVT = Val.getValueType();
14305
14306 if (!isTypeLegal(ResVT))
14307 return SDValue();
14308
14309 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14310 bool ValidTypeForStoreFltAsInt =
14311 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14312 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14313
14314 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14315 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14316 return SDValue();
14317
14318 // Extend f32 values to f64
14319 if (ResVT.getScalarSizeInBits() == 32) {
14320 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14321 DCI.AddToWorklist(Val.getNode());
14322 }
14323
14324 // Set signed or unsigned conversion opcode.
14325 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14326 PPCISD::FP_TO_SINT_IN_VSR :
14327 PPCISD::FP_TO_UINT_IN_VSR;
14328
14329 Val = DAG.getNode(ConvOpcode,
14330 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14331 DCI.AddToWorklist(Val.getNode());
14332
14333 // Set number of bytes being converted.
14334 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14335 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14336 DAG.getIntPtrConstant(ByteSize, dl, false),
14337 DAG.getValueType(Op1VT) };
14338
14339 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14340 DAG.getVTList(MVT::Other), Ops,
14341 cast<StoreSDNode>(N)->getMemoryVT(),
14342 cast<StoreSDNode>(N)->getMemOperand());
14343
14344 DCI.AddToWorklist(Val.getNode());
14345 return Val;
14346}
14347
14348static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14349 // Check that the source of the element keeps flipping
14350 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14351 bool PrevElemFromFirstVec = Mask[0] < NumElts;
14352 for (int i = 1, e = Mask.size(); i < e; i++) {
14353 if (PrevElemFromFirstVec && Mask[i] < NumElts)
14354 return false;
14355 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14356 return false;
14357 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14358 }
14359 return true;
14360}
14361
14362static bool isSplatBV(SDValue Op) {
14363 if (Op.getOpcode() != ISD::BUILD_VECTOR)
14364 return false;
14365 SDValue FirstOp;
14366
14367 // Find first non-undef input.
14368 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14369 FirstOp = Op.getOperand(i);
14370 if (!FirstOp.isUndef())
14371 break;
14372 }
14373
14374 // All inputs are undef or the same as the first non-undef input.
14375 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14376 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14377 return false;
14378 return true;
14379}
14380
14381static SDValue isScalarToVec(SDValue Op) {
14382 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14383 return Op;
14384 if (Op.getOpcode() != ISD::BITCAST)
14385 return SDValue();
14386 Op = Op.getOperand(0);
14387 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14388 return Op;
14389 return SDValue();
14390}
14391
14392static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14393 int LHSMaxIdx, int RHSMinIdx,
14394 int RHSMaxIdx, int HalfVec) {
14395 for (int i = 0, e = ShuffV.size(); i < e; i++) {
14396 int Idx = ShuffV[i];
14397 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14398 ShuffV[i] += HalfVec;
14399 }
14400 return;
14401}
14402
14403// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14404// the original is:
14405// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14406// In such a case, just change the shuffle mask to extract the element
14407// from the permuted index.
14408static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14409 SDLoc dl(OrigSToV);
14410 EVT VT = OrigSToV.getValueType();
14411 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14412, __PRETTY_FUNCTION__))
14412 "Expecting a SCALAR_TO_VECTOR here")((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14412, __PRETTY_FUNCTION__))
;
14413 SDValue Input = OrigSToV.getOperand(0);
14414
14415 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14416 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14417 SDValue OrigVector = Input.getOperand(0);
14418
14419 // Can't handle non-const element indices or different vector types
14420 // for the input to the extract and the output of the scalar_to_vector.
14421 if (Idx && VT == OrigVector.getValueType()) {
14422 SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14423 NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14424 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14425 }
14426 }
14427 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14428 OrigSToV.getOperand(0));
14429}
14430
14431// On little endian subtargets, combine shuffles such as:
14432// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14433// into:
14434// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14435// because the latter can be matched to a single instruction merge.
14436// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14437// to put the value into element zero. Adjust the shuffle mask so that the
14438// vector can remain in permuted form (to prevent a swap prior to a shuffle).
14439SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14440 SelectionDAG &DAG) const {
14441 SDValue LHS = SVN->getOperand(0);
14442 SDValue RHS = SVN->getOperand(1);
14443 auto Mask = SVN->getMask();
14444 int NumElts = LHS.getValueType().getVectorNumElements();
14445 SDValue Res(SVN, 0);
14446 SDLoc dl(SVN);
14447
14448 // None of these combines are useful on big endian systems since the ISA
14449 // already has a big endian bias.
14450 if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14451 return Res;
14452
14453 // If this is not a shuffle of a shuffle and the first element comes from
14454 // the second vector, canonicalize to the commuted form. This will make it
14455 // more likely to match one of the single instruction patterns.
14456 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14457 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14458 std::swap(LHS, RHS);
14459 Res = DAG.getCommutedVectorShuffle(*SVN);
14460 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14461 }
14462
14463 // Adjust the shuffle mask if either input vector comes from a
14464 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14465 // form (to prevent the need for a swap).
14466 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14467 SDValue SToVLHS = isScalarToVec(LHS);
14468 SDValue SToVRHS = isScalarToVec(RHS);
14469 if (SToVLHS || SToVRHS) {
14470 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14471 : SToVRHS.getValueType().getVectorNumElements();
14472 int NumEltsOut = ShuffV.size();
14473
14474 // Initially assume that neither input is permuted. These will be adjusted
14475 // accordingly if either input is.
14476 int LHSMaxIdx = -1;
14477 int RHSMinIdx = -1;
14478 int RHSMaxIdx = -1;
14479 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14480
14481 // Get the permuted scalar to vector nodes for the source(s) that come from
14482 // ISD::SCALAR_TO_VECTOR.
14483 if (SToVLHS) {
14484 // Set up the values for the shuffle vector fixup.
14485 LHSMaxIdx = NumEltsOut / NumEltsIn;
14486 SToVLHS = getSToVPermuted(SToVLHS, DAG);
14487 if (SToVLHS.getValueType() != LHS.getValueType())
14488 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14489 LHS = SToVLHS;
14490 }
14491 if (SToVRHS) {
14492 RHSMinIdx = NumEltsOut;
14493 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14494 SToVRHS = getSToVPermuted(SToVRHS, DAG);
14495 if (SToVRHS.getValueType() != RHS.getValueType())
14496 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14497 RHS = SToVRHS;
14498 }
14499
14500 // Fix up the shuffle mask to reflect where the desired element actually is.
14501 // The minimum and maximum indices that correspond to element zero for both
14502 // the LHS and RHS are computed and will control which shuffle mask entries
14503 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14504 // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14505 // HalfVec to refer to the corresponding element in the permuted vector.
14506 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14507 HalfVec);
14508 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14509
14510 // We may have simplified away the shuffle. We won't be able to do anything
14511 // further with it here.
14512 if (!isa<ShuffleVectorSDNode>(Res))
14513 return Res;
14514 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14515 }
14516
14517 // The common case after we commuted the shuffle is that the RHS is a splat
14518 // and we have elements coming in from the splat at indices that are not
14519 // conducive to using a merge.
14520 // Example:
14521 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14522 if (!isSplatBV(RHS))
14523 return Res;
14524
14525 // We are looking for a mask such that all even elements are from
14526 // one vector and all odd elements from the other.
14527 if (!isAlternatingShuffMask(Mask, NumElts))
14528 return Res;
14529
14530 // Adjust the mask so we are pulling in the same index from the splat
14531 // as the index from the interesting vector in consecutive elements.
14532 // Example (even elements from first vector):
14533 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14534 if (Mask[0] < NumElts)
14535 for (int i = 1, e = Mask.size(); i < e; i += 2)
14536 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14537 // Example (odd elements from first vector):
14538 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14539 else
14540 for (int i = 0, e = Mask.size(); i < e; i += 2)
14541 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14542
14543 // If the RHS has undefs, we need to remove them since we may have created
14544 // a shuffle that adds those instead of the splat value.
14545 SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14546 RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14547
14548 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14549 return Res;
14550}
14551
14552SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14553 LSBaseSDNode *LSBase,
14554 DAGCombinerInfo &DCI) const {
14555 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14556, __PRETTY_FUNCTION__))
14556 "Not a reverse memop pattern!")(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14556, __PRETTY_FUNCTION__))
;
14557
14558 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14559 auto Mask = SVN->getMask();
14560 int i = 0;
14561 auto I = Mask.rbegin();
14562 auto E = Mask.rend();
14563
14564 for (; I != E; ++I) {
14565 if (*I != i)
14566 return false;
14567 i++;
14568 }
14569 return true;
14570 };
14571
14572 SelectionDAG &DAG = DCI.DAG;
14573 EVT VT = SVN->getValueType(0);
14574
14575 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14576 return SDValue();
14577
14578 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14579 // See comment in PPCVSXSwapRemoval.cpp.
14580 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14581 if (!Subtarget.hasP9Vector())
14582 return SDValue();
14583
14584 if(!IsElementReverse(SVN))
14585 return SDValue();
14586
14587 if (LSBase->getOpcode() == ISD::LOAD) {
14588 SDLoc dl(SVN);
14589 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14590 return DAG.getMemIntrinsicNode(
14591 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14592 LSBase->getMemoryVT(), LSBase->getMemOperand());
14593 }
14594
14595 if (LSBase->getOpcode() == ISD::STORE) {
14596 SDLoc dl(LSBase);
14597 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14598 LSBase->getBasePtr()};
14599 return DAG.getMemIntrinsicNode(
14600 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14601 LSBase->getMemoryVT(), LSBase->getMemOperand());
14602 }
14603
14604 llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14604)
;
14605}
14606
14607SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14608 DAGCombinerInfo &DCI) const {
14609 SelectionDAG &DAG = DCI.DAG;
14610 SDLoc dl(N);
14611 switch (N->getOpcode()) {
14612 default: break;
14613 case ISD::ADD:
14614 return combineADD(N, DCI);
14615 case ISD::SHL:
14616 return combineSHL(N, DCI);
14617 case ISD::SRA:
14618 return combineSRA(N, DCI);
14619 case ISD::SRL:
14620 return combineSRL(N, DCI);
14621 case ISD::MUL:
14622 return combineMUL(N, DCI);
14623 case ISD::FMA:
14624 case PPCISD::FNMSUB:
14625 return combineFMALike(N, DCI);
14626 case PPCISD::SHL:
14627 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14628 return N->getOperand(0);
14629 break;
14630 case PPCISD::SRL:
14631 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14632 return N->getOperand(0);
14633 break;
14634 case PPCISD::SRA:
14635 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14636 if (C->isNullValue() || // 0 >>s V -> 0.
14637 C->isAllOnesValue()) // -1 >>s V -> -1.
14638 return N->getOperand(0);
14639 }
14640 break;
14641 case ISD::SIGN_EXTEND:
14642 case ISD::ZERO_EXTEND:
14643 case ISD::ANY_EXTEND:
14644 return DAGCombineExtBoolTrunc(N, DCI);
14645 case ISD::TRUNCATE:
14646 return combineTRUNCATE(N, DCI);
14647 case ISD::SETCC:
14648 if (SDValue CSCC = combineSetCC(N, DCI))
14649 return CSCC;
14650 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14651 case ISD::SELECT_CC:
14652 return DAGCombineTruncBoolExt(N, DCI);
14653 case ISD::SINT_TO_FP:
14654 case ISD::UINT_TO_FP:
14655 return combineFPToIntToFP(N, DCI);
14656 case ISD::VECTOR_SHUFFLE:
14657 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14658 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14659 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14660 }
14661 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14662 case ISD::STORE: {
14663
14664 EVT Op1VT = N->getOperand(1).getValueType();
14665 unsigned Opcode = N->getOperand(1).getOpcode();
14666
14667 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14668 SDValue Val= combineStoreFPToInt(N, DCI);
14669 if (Val)
14670 return Val;
14671 }
14672
14673 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14674 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14675 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14676 if (Val)
14677 return Val;
14678 }
14679
14680 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14681 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14682 N->getOperand(1).getNode()->hasOneUse() &&
14683 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14684 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14685
14686 // STBRX can only handle simple types and it makes no sense to store less
14687 // two bytes in byte-reversed order.
14688 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14689 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14690 break;
14691
14692 SDValue BSwapOp = N->getOperand(1).getOperand(0);
14693 // Do an any-extend to 32-bits if this is a half-word input.
14694 if (BSwapOp.getValueType() == MVT::i16)
14695 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14696
14697 // If the type of BSWAP operand is wider than stored memory width
14698 // it need to be shifted to the right side before STBRX.
14699 if (Op1VT.bitsGT(mVT)) {
14700 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14701 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14702 DAG.getConstant(Shift, dl, MVT::i32));
14703 // Need to truncate if this is a bswap of i64 stored as i32/i16.
14704 if (Op1VT == MVT::i64)
14705 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14706 }
14707
14708 SDValue Ops[] = {
14709 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14710 };
14711 return
14712 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14713 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14714 cast<StoreSDNode>(N)->getMemOperand());
14715 }
14716
14717 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14718 // So it can increase the chance of CSE constant construction.
14719 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14720 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14721 // Need to sign-extended to 64-bits to handle negative values.
14722 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14723 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14724 MemVT.getSizeInBits());
14725 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14726
14727 // DAG.getTruncStore() can't be used here because it doesn't accept
14728 // the general (base + offset) addressing mode.
14729 // So we use UpdateNodeOperands and setTruncatingStore instead.
14730 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14731 N->getOperand(3));
14732 cast<StoreSDNode>(N)->setTruncatingStore(true);
14733 return SDValue(N, 0);
14734 }
14735
14736 // For little endian, VSX stores require generating xxswapd/lxvd2x.
14737 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14738 if (Op1VT.isSimple()) {
14739 MVT StoreVT = Op1VT.getSimpleVT();
14740 if (Subtarget.needsSwapsForVSXMemOps() &&
14741 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14742 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14743 return expandVSXStoreForLE(N, DCI);
14744 }
14745 break;
14746 }
14747 case ISD::LOAD: {
14748 LoadSDNode *LD = cast<LoadSDNode>(N);
14749 EVT VT = LD->getValueType(0);
14750
14751 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14752 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14753 if (VT.isSimple()) {
14754 MVT LoadVT = VT.getSimpleVT();
14755 if (Subtarget.needsSwapsForVSXMemOps() &&
14756 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14757 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14758 return expandVSXLoadForLE(N, DCI);
14759 }
14760
14761 // We sometimes end up with a 64-bit integer load, from which we extract
14762 // two single-precision floating-point numbers. This happens with
14763 // std::complex<float>, and other similar structures, because of the way we
14764 // canonicalize structure copies. However, if we lack direct moves,
14765 // then the final bitcasts from the extracted integer values to the
14766 // floating-point numbers turn into store/load pairs. Even with direct moves,
14767 // just loading the two floating-point numbers is likely better.
14768 auto ReplaceTwoFloatLoad = [&]() {
14769 if (VT != MVT::i64)
14770 return false;
14771
14772 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14773 LD->isVolatile())
14774 return false;
14775
14776 // We're looking for a sequence like this:
14777 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14778 // t16: i64 = srl t13, Constant:i32<32>
14779 // t17: i32 = truncate t16
14780 // t18: f32 = bitcast t17
14781 // t19: i32 = truncate t13
14782 // t20: f32 = bitcast t19
14783
14784 if (!LD->hasNUsesOfValue(2, 0))
14785 return false;
14786
14787 auto UI = LD->use_begin();
14788 while (UI.getUse().getResNo() != 0) ++UI;
14789 SDNode *Trunc = *UI++;
14790 while (UI.getUse().getResNo() != 0) ++UI;
14791 SDNode *RightShift = *UI;
14792 if (Trunc->getOpcode() != ISD::TRUNCATE)
14793 std::swap(Trunc, RightShift);
14794
14795 if (Trunc->getOpcode() != ISD::TRUNCATE ||
14796 Trunc->getValueType(0) != MVT::i32 ||
14797 !Trunc->hasOneUse())
14798 return false;
14799 if (RightShift->getOpcode() != ISD::SRL ||
14800 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14801 RightShift->getConstantOperandVal(1) != 32 ||
14802 !RightShift->hasOneUse())
14803 return false;
14804
14805 SDNode *Trunc2 = *RightShift->use_begin();
14806 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14807 Trunc2->getValueType(0) != MVT::i32 ||
14808 !Trunc2->hasOneUse())
14809 return false;
14810
14811 SDNode *Bitcast = *Trunc->use_begin();
14812 SDNode *Bitcast2 = *Trunc2->use_begin();
14813
14814 if (Bitcast->getOpcode() != ISD::BITCAST ||
14815 Bitcast->getValueType(0) != MVT::f32)
14816 return false;
14817 if (Bitcast2->getOpcode() != ISD::BITCAST ||
14818 Bitcast2->getValueType(0) != MVT::f32)
14819 return false;
14820
14821 if (Subtarget.isLittleEndian())
14822 std::swap(Bitcast, Bitcast2);
14823
14824 // Bitcast has the second float (in memory-layout order) and Bitcast2
14825 // has the first one.
14826
14827 SDValue BasePtr = LD->getBasePtr();
14828 if (LD->isIndexed()) {
14829 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14830, __PRETTY_FUNCTION__))
14830 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14830, __PRETTY_FUNCTION__))
;
14831 BasePtr =
14832 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14833 LD->getOffset());
14834 }
14835
14836 auto MMOFlags =
14837 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14838 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14839 LD->getPointerInfo(), LD->getAlignment(),
14840 MMOFlags, LD->getAAInfo());
14841 SDValue AddPtr =
14842 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14843 BasePtr, DAG.getIntPtrConstant(4, dl));
14844 SDValue FloatLoad2 = DAG.getLoad(
14845 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14846 LD->getPointerInfo().getWithOffset(4),
14847 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14848
14849 if (LD->isIndexed()) {
14850 // Note that DAGCombine should re-form any pre-increment load(s) from
14851 // what is produced here if that makes sense.
14852 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14853 }
14854
14855 DCI.CombineTo(Bitcast2, FloatLoad);
14856 DCI.CombineTo(Bitcast, FloatLoad2);
14857
14858 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14859 SDValue(FloatLoad2.getNode(), 1));
14860 return true;
14861 };
14862
14863 if (ReplaceTwoFloatLoad())
14864 return SDValue(N, 0);
14865
14866 EVT MemVT = LD->getMemoryVT();
14867 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14868 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14869 if (LD->isUnindexed() && VT.isVector() &&
14870 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14871 // P8 and later hardware should just use LOAD.
14872 !Subtarget.hasP8Vector() &&
14873 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14874 VT == MVT::v4f32))) &&
14875 LD->getAlign() < ABIAlignment) {
14876 // This is a type-legal unaligned Altivec load.
14877 SDValue Chain = LD->getChain();
14878 SDValue Ptr = LD->getBasePtr();
14879 bool isLittleEndian = Subtarget.isLittleEndian();
14880
14881 // This implements the loading of unaligned vectors as described in
14882 // the venerable Apple Velocity Engine overview. Specifically:
14883 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14884 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14885 //
14886 // The general idea is to expand a sequence of one or more unaligned
14887 // loads into an alignment-based permutation-control instruction (lvsl
14888 // or lvsr), a series of regular vector loads (which always truncate
14889 // their input address to an aligned address), and a series of
14890 // permutations. The results of these permutations are the requested
14891 // loaded values. The trick is that the last "extra" load is not taken
14892 // from the address you might suspect (sizeof(vector) bytes after the
14893 // last requested load), but rather sizeof(vector) - 1 bytes after the
14894 // last requested vector. The point of this is to avoid a page fault if
14895 // the base address happened to be aligned. This works because if the
14896 // base address is aligned, then adding less than a full vector length
14897 // will cause the last vector in the sequence to be (re)loaded.
14898 // Otherwise, the next vector will be fetched as you might suspect was
14899 // necessary.
14900
14901 // We might be able to reuse the permutation generation from
14902 // a different base address offset from this one by an aligned amount.
14903 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14904 // optimization later.
14905 Intrinsic::ID Intr, IntrLD, IntrPerm;
14906 MVT PermCntlTy, PermTy, LDTy;
14907 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14908 : Intrinsic::ppc_altivec_lvsl;
14909 IntrLD = Intrinsic::ppc_altivec_lvx;
14910 IntrPerm = Intrinsic::ppc_altivec_vperm;
14911 PermCntlTy = MVT::v16i8;
14912 PermTy = MVT::v4i32;
14913 LDTy = MVT::v4i32;
14914
14915 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14916
14917 // Create the new MMO for the new base load. It is like the original MMO,
14918 // but represents an area in memory almost twice the vector size centered
14919 // on the original address. If the address is unaligned, we might start
14920 // reading up to (sizeof(vector)-1) bytes below the address of the
14921 // original unaligned load.
14922 MachineFunction &MF = DAG.getMachineFunction();
14923 MachineMemOperand *BaseMMO =
14924 MF.getMachineMemOperand(LD->getMemOperand(),
14925 -(long)MemVT.getStoreSize()+1,
14926 2*MemVT.getStoreSize()-1);
14927
14928 // Create the new base load.
14929 SDValue LDXIntID =
14930 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14931 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14932 SDValue BaseLoad =
14933 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14934 DAG.getVTList(PermTy, MVT::Other),
14935 BaseLoadOps, LDTy, BaseMMO);
14936
14937 // Note that the value of IncOffset (which is provided to the next
14938 // load's pointer info offset value, and thus used to calculate the
14939 // alignment), and the value of IncValue (which is actually used to
14940 // increment the pointer value) are different! This is because we
14941 // require the next load to appear to be aligned, even though it
14942 // is actually offset from the base pointer by a lesser amount.
14943 int IncOffset = VT.getSizeInBits() / 8;
14944 int IncValue = IncOffset;
14945
14946 // Walk (both up and down) the chain looking for another load at the real
14947 // (aligned) offset (the alignment of the other load does not matter in
14948 // this case). If found, then do not use the offset reduction trick, as
14949 // that will prevent the loads from being later combined (as they would
14950 // otherwise be duplicates).
14951 if (!findConsecutiveLoad(LD, DAG))
14952 --IncValue;
14953
14954 SDValue Increment =
14955 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14956 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14957
14958 MachineMemOperand *ExtraMMO =
14959 MF.getMachineMemOperand(LD->getMemOperand(),
14960 1, 2*MemVT.getStoreSize()-1);
14961 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14962 SDValue ExtraLoad =
14963 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14964 DAG.getVTList(PermTy, MVT::Other),
14965 ExtraLoadOps, LDTy, ExtraMMO);
14966
14967 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14968 BaseLoad.getValue(1), ExtraLoad.getValue(1));
14969
14970 // Because vperm has a big-endian bias, we must reverse the order
14971 // of the input vectors and complement the permute control vector
14972 // when generating little endian code. We have already handled the
14973 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14974 // and ExtraLoad here.
14975 SDValue Perm;
14976 if (isLittleEndian)
14977 Perm = BuildIntrinsicOp(IntrPerm,
14978 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14979 else
14980 Perm = BuildIntrinsicOp(IntrPerm,
14981 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14982
14983 if (VT != PermTy)
14984 Perm = Subtarget.hasAltivec()
14985 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14986 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14987 DAG.getTargetConstant(1, dl, MVT::i64));
14988 // second argument is 1 because this rounding
14989 // is always exact.
14990
14991 // The output of the permutation is our loaded result, the TokenFactor is
14992 // our new chain.
14993 DCI.CombineTo(N, Perm, TF);
14994 return SDValue(N, 0);
14995 }
14996 }
14997 break;
14998 case ISD::INTRINSIC_WO_CHAIN: {
14999 bool isLittleEndian = Subtarget.isLittleEndian();
15000 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15001 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15002 : Intrinsic::ppc_altivec_lvsl);
15003 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15004 SDValue Add = N->getOperand(1);
15005
15006 int Bits = 4 /* 16 byte alignment */;
15007
15008 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15009 APInt::getAllOnesValue(Bits /* alignment */)
15010 .zext(Add.getScalarValueSizeInBits()))) {
15011 SDNode *BasePtr = Add->getOperand(0).getNode();
15012 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15013 UE = BasePtr->use_end();
15014 UI != UE; ++UI) {
15015 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15016 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15017 IID) {
15018 // We've found another LVSL/LVSR, and this address is an aligned
15019 // multiple of that one. The results will be the same, so use the
15020 // one we've just found instead.
15021
15022 return SDValue(*UI, 0);
15023 }
15024 }
15025 }
15026
15027 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15028 SDNode *BasePtr = Add->getOperand(0).getNode();
15029 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15030 UE = BasePtr->use_end(); UI != UE; ++UI) {
15031 if (UI->getOpcode() == ISD::ADD &&
15032 isa<ConstantSDNode>(UI->getOperand(1)) &&
15033 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15034 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15035 (1ULL << Bits) == 0) {
15036 SDNode *OtherAdd = *UI;
15037 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15038 VE = OtherAdd->use_end(); VI != VE; ++VI) {
15039 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15040 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15041 return SDValue(*VI, 0);
15042 }
15043 }
15044 }
15045 }
15046 }
15047 }
15048
15049 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15050 // Expose the vabsduw/h/b opportunity for down stream
15051 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15052 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15053 IID == Intrinsic::ppc_altivec_vmaxsh ||
15054 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15055 SDValue V1 = N->getOperand(1);
15056 SDValue V2 = N->getOperand(2);
15057 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15058 V1.getSimpleValueType() == MVT::v8i16 ||
15059 V1.getSimpleValueType() == MVT::v16i8) &&
15060 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15061 // (0-a, a)
15062 if (V1.getOpcode() == ISD::SUB &&
15063 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15064 V1.getOperand(1) == V2) {
15065 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15066 }
15067 // (a, 0-a)
15068 if (V2.getOpcode() == ISD::SUB &&
15069 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15070 V2.getOperand(1) == V1) {
15071 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15072 }
15073 // (x-y, y-x)
15074 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15075 V1.getOperand(0) == V2.getOperand(1) &&
15076 V1.getOperand(1) == V2.getOperand(0)) {
15077 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15078 }
15079 }
15080 }
15081 }
15082
15083 break;
15084 case ISD::INTRINSIC_W_CHAIN:
15085 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15086 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15087 if (Subtarget.needsSwapsForVSXMemOps()) {
15088 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15089 default:
15090 break;
15091 case Intrinsic::ppc_vsx_lxvw4x:
15092 case Intrinsic::ppc_vsx_lxvd2x:
15093 return expandVSXLoadForLE(N, DCI);
15094 }
15095 }
15096 break;
15097 case ISD::INTRINSIC_VOID:
15098 // For little endian, VSX stores require generating xxswapd/stxvd2x.
15099 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15100 if (Subtarget.needsSwapsForVSXMemOps()) {
15101 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15102 default:
15103 break;
15104 case Intrinsic::ppc_vsx_stxvw4x:
15105 case Intrinsic::ppc_vsx_stxvd2x:
15106 return expandVSXStoreForLE(N, DCI);
15107 }
15108 }
15109 break;
15110 case ISD::BSWAP:
15111 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15112 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15113 N->getOperand(0).hasOneUse() &&
15114 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15115 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15116 N->getValueType(0) == MVT::i64))) {
15117 SDValue Load = N->getOperand(0);
15118 LoadSDNode *LD = cast<LoadSDNode>(Load);
15119 // Create the byte-swapping load.
15120 SDValue Ops[] = {
15121 LD->getChain(), // Chain
15122 LD->getBasePtr(), // Ptr
15123 DAG.getValueType(N->getValueType(0)) // VT
15124 };
15125 SDValue BSLoad =
15126 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15127 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15128 MVT::i64 : MVT::i32, MVT::Other),
15129 Ops, LD->getMemoryVT(), LD->getMemOperand());
15130
15131 // If this is an i16 load, insert the truncate.
15132 SDValue ResVal = BSLoad;
15133 if (N->getValueType(0) == MVT::i16)
15134 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15135
15136 // First, combine the bswap away. This makes the value produced by the
15137 // load dead.
15138 DCI.CombineTo(N, ResVal);
15139
15140 // Next, combine the load away, we give it a bogus result value but a real
15141 // chain result. The result value is dead because the bswap is dead.
15142 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15143
15144 // Return N so it doesn't get rechecked!
15145 return SDValue(N, 0);
15146 }
15147 break;
15148 case PPCISD::VCMP:
15149 // If a VCMPo node already exists with exactly the same operands as this
15150 // node, use its result instead of this node (VCMPo computes both a CR6 and
15151 // a normal output).
15152 //
15153 if (!N->getOperand(0).hasOneUse() &&
15154 !N->getOperand(1).hasOneUse() &&
15155 !N->getOperand(2).hasOneUse()) {
15156
15157 // Scan all of the users of the LHS, looking for VCMPo's that match.
15158 SDNode *VCMPoNode = nullptr;
15159
15160 SDNode *LHSN = N->getOperand(0).getNode();
15161 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15162 UI != E; ++UI)
15163 if (UI->getOpcode() == PPCISD::VCMPo &&
15164 UI->getOperand(1) == N->getOperand(1) &&
15165 UI->getOperand(2) == N->getOperand(2) &&
15166 UI->getOperand(0) == N->getOperand(0)) {
15167 VCMPoNode = *UI;
15168 break;
15169 }
15170
15171 // If there is no VCMPo node, or if the flag value has a single use, don't
15172 // transform this.
15173 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
15174 break;
15175
15176 // Look at the (necessarily single) use of the flag value. If it has a
15177 // chain, this transformation is more complex. Note that multiple things
15178 // could use the value result, which we should ignore.
15179 SDNode *FlagUser = nullptr;
15180 for (SDNode::use_iterator UI = VCMPoNode->use_begin();
15181 FlagUser == nullptr; ++UI) {
15182 assert(UI != VCMPoNode->use_end() && "Didn't find user!")((UI != VCMPoNode->use_end() && "Didn't find user!"
) ? static_cast<void> (0) : __assert_fail ("UI != VCMPoNode->use_end() && \"Didn't find user!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15182, __PRETTY_FUNCTION__))
;
15183 SDNode *User = *UI;
15184 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15185 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
15186 FlagUser = User;
15187 break;
15188 }
15189 }
15190 }
15191
15192 // If the user is a MFOCRF instruction, we know this is safe.
15193 // Otherwise we give up for right now.
15194 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15195 return SDValue(VCMPoNode, 0);
15196 }
15197 break;
15198 case ISD::BRCOND: {
15199 SDValue Cond = N->getOperand(1);
15200 SDValue Target = N->getOperand(2);
15201
15202 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15203 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15204 Intrinsic::loop_decrement) {
15205
15206 // We now need to make the intrinsic dead (it cannot be instruction
15207 // selected).
15208 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15209 assert(Cond.getNode()->hasOneUse() &&((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15210, __PRETTY_FUNCTION__))
15210 "Counter decrement has more than one use")((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15210, __PRETTY_FUNCTION__))
;
15211
15212 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15213 N->getOperand(0), Target);
15214 }
15215 }
15216 break;
15217 case ISD::BR_CC: {
15218 // If this is a branch on an altivec predicate comparison, lower this so
15219 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15220 // lowering is done pre-legalize, because the legalizer lowers the predicate
15221 // compare down to code that is difficult to reassemble.
15222 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15223 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15224
15225 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15226 // value. If so, pass-through the AND to get to the intrinsic.
15227 if (LHS.getOpcode() == ISD::AND &&
15228 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15229 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15230 Intrinsic::loop_decrement &&
15231 isa<ConstantSDNode>(LHS.getOperand(1)) &&
15232 !isNullConstant(LHS.getOperand(1)))
15233 LHS = LHS.getOperand(0);
15234
15235 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15236 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15237 Intrinsic::loop_decrement &&
15238 isa<ConstantSDNode>(RHS)) {
15239 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15240, __PRETTY_FUNCTION__))
15240 "Counter decrement comparison is not EQ or NE")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15240, __PRETTY_FUNCTION__))
;
15241
15242 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15243 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15244 (CC == ISD::SETNE && !Val);
15245
15246 // We now need to make the intrinsic dead (it cannot be instruction
15247 // selected).
15248 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15249 assert(LHS.getNode()->hasOneUse() &&((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15250, __PRETTY_FUNCTION__))
15250 "Counter decrement has more than one use")((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15250, __PRETTY_FUNCTION__))
;
15251
15252 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15253 N->getOperand(0), N->getOperand(4));
15254 }
15255
15256 int CompareOpc;
15257 bool isDot;
15258
15259 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15260 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15261 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15262 assert(isDot && "Can't compare against a vector result!")((isDot && "Can't compare against a vector result!") ?
static_cast<void> (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15262, __PRETTY_FUNCTION__))
;
15263
15264 // If this is a comparison against something other than 0/1, then we know
15265 // that the condition is never/always true.
15266 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15267 if (Val != 0 && Val != 1) {
15268 if (CC == ISD::SETEQ) // Cond never true, remove branch.
15269 return N->getOperand(0);
15270 // Always !=, turn it into an unconditional branch.
15271 return DAG.getNode(ISD::BR, dl, MVT::Other,
15272 N->getOperand(0), N->getOperand(4));
15273 }
15274
15275 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15276
15277 // Create the PPCISD altivec 'dot' comparison node.
15278 SDValue Ops[] = {
15279 LHS.getOperand(2), // LHS of compare
15280 LHS.getOperand(3), // RHS of compare
15281 DAG.getConstant(CompareOpc, dl, MVT::i32)
15282 };
15283 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15284 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
15285
15286 // Unpack the result based on how the target uses it.
15287 PPC::Predicate CompOpc;
15288 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15289 default: // Can't happen, don't crash on invalid number though.
15290 case 0: // Branch on the value of the EQ bit of CR6.
15291 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15292 break;
15293 case 1: // Branch on the inverted value of the EQ bit of CR6.
15294 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15295 break;
15296 case 2: // Branch on the value of the LT bit of CR6.
15297 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15298 break;
15299 case 3: // Branch on the inverted value of the LT bit of CR6.
15300 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15301 break;
15302 }
15303
15304 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15305 DAG.getConstant(CompOpc, dl, MVT::i32),
15306 DAG.getRegister(PPC::CR6, MVT::i32),
15307 N->getOperand(4), CompNode.getValue(1));
15308 }
15309 break;
15310 }
15311 case ISD::BUILD_VECTOR:
15312 return DAGCombineBuildVector(N, DCI);
15313 case ISD::ABS:
15314 return combineABS(N, DCI);
15315 case ISD::VSELECT:
15316 return combineVSelect(N, DCI);
15317 }
15318
15319 return SDValue();
15320}
15321
15322SDValue
15323PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15324 SelectionDAG &DAG,
15325 SmallVectorImpl<SDNode *> &Created) const {
15326 // fold (sdiv X, pow2)
15327 EVT VT = N->getValueType(0);
15328 if (VT == MVT::i64 && !Subtarget.isPPC64())
15329 return SDValue();
15330 if ((VT != MVT::i32 && VT != MVT::i64) ||
15331 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15332 return SDValue();
15333
15334 SDLoc DL(N);
15335 SDValue N0 = N->getOperand(0);
15336
15337 bool IsNegPow2 = (-Divisor).isPowerOf2();
15338 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15339 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15340
15341 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15342 Created.push_back(Op.getNode());
15343
15344 if (IsNegPow2) {
15345 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15346 Created.push_back(Op.getNode());
15347 }
15348
15349 return Op;
15350}
15351
15352//===----------------------------------------------------------------------===//
15353// Inline Assembly Support
15354//===----------------------------------------------------------------------===//
15355
15356void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15357 KnownBits &Known,
15358 const APInt &DemandedElts,
15359 const SelectionDAG &DAG,
15360 unsigned Depth) const {
15361 Known.resetAll();
15362 switch (Op.getOpcode()) {
15363 default: break;
15364 case PPCISD::LBRX: {
15365 // lhbrx is known to have the top bits cleared out.
15366 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15367 Known.Zero = 0xFFFF0000;
15368 break;
15369 }
15370 case ISD::INTRINSIC_WO_CHAIN: {
15371 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15372 default: break;
15373 case Intrinsic::ppc_altivec_vcmpbfp_p:
15374 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15375 case Intrinsic::ppc_altivec_vcmpequb_p:
15376 case Intrinsic::ppc_altivec_vcmpequh_p:
15377 case Intrinsic::ppc_altivec_vcmpequw_p:
15378 case Intrinsic::ppc_altivec_vcmpequd_p:
15379 case Intrinsic::ppc_altivec_vcmpequq_p:
15380 case Intrinsic::ppc_altivec_vcmpgefp_p:
15381 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15382 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15383 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15384 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15385 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15386 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15387 case Intrinsic::ppc_altivec_vcmpgtub_p:
15388 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15389 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15390 case Intrinsic::ppc_altivec_vcmpgtud_p:
15391 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15392 Known.Zero = ~1U; // All bits but the low one are known to be zero.
15393 break;
15394 }
15395 }
15396 }
15397}
15398
15399Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15400 switch (Subtarget.getCPUDirective()) {
15401 default: break;
15402 case PPC::DIR_970:
15403 case PPC::DIR_PWR4:
15404 case PPC::DIR_PWR5:
15405 case PPC::DIR_PWR5X:
15406 case PPC::DIR_PWR6:
15407 case PPC::DIR_PWR6X:
15408 case PPC::DIR_PWR7:
15409 case PPC::DIR_PWR8:
15410 case PPC::DIR_PWR9:
15411 case PPC::DIR_PWR10:
15412 case PPC::DIR_PWR_FUTURE: {
15413 if (!ML)
15414 break;
15415
15416 if (!DisableInnermostLoopAlign32) {
15417 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15418 // so that we can decrease cache misses and branch-prediction misses.
15419 // Actual alignment of the loop will depend on the hotness check and other
15420 // logic in alignBlocks.
15421 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15422 return Align(32);
15423 }
15424
15425 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15426
15427 // For small loops (between 5 and 8 instructions), align to a 32-byte
15428 // boundary so that the entire loop fits in one instruction-cache line.
15429 uint64_t LoopSize = 0;
15430 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15431 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15432 LoopSize += TII->getInstSizeInBytes(*J);
15433 if (LoopSize > 32)
15434 break;
15435 }
15436
15437 if (LoopSize > 16 && LoopSize <= 32)
15438 return Align(32);
15439
15440 break;
15441 }
15442 }
15443
15444 return TargetLowering::getPrefLoopAlignment(ML);
15445}
15446
15447/// getConstraintType - Given a constraint, return the type of
15448/// constraint it is for this target.
15449PPCTargetLowering::ConstraintType
15450PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15451 if (Constraint.size() == 1) {
15452 switch (Constraint[0]) {
15453 default: break;
15454 case 'b':
15455 case 'r':
15456 case 'f':
15457 case 'd':
15458 case 'v':
15459 case 'y':
15460 return C_RegisterClass;
15461 case 'Z':
15462 // FIXME: While Z does indicate a memory constraint, it specifically
15463 // indicates an r+r address (used in conjunction with the 'y' modifier
15464 // in the replacement string). Currently, we're forcing the base
15465 // register to be r0 in the asm printer (which is interpreted as zero)
15466 // and forming the complete address in the second register. This is
15467 // suboptimal.
15468 return C_Memory;
15469 }
15470 } else if (Constraint == "wc") { // individual CR bits.
15471 return C_RegisterClass;
15472 } else if (Constraint == "wa" || Constraint == "wd" ||
15473 Constraint == "wf" || Constraint == "ws" ||
15474 Constraint == "wi" || Constraint == "ww") {
15475 return C_RegisterClass; // VSX registers.
15476 }
15477 return TargetLowering::getConstraintType(Constraint);
15478}
15479
15480/// Examine constraint type and operand type and determine a weight value.
15481/// This object must already have been set up with the operand type
15482/// and the current alternative constraint selected.
15483TargetLowering::ConstraintWeight
15484PPCTargetLowering::getSingleConstraintMatchWeight(
15485 AsmOperandInfo &info, const char *constraint) const {
15486 ConstraintWeight weight = CW_Invalid;
15487 Value *CallOperandVal = info.CallOperandVal;
15488 // If we don't have a value, we can't do a match,
15489 // but allow it at the lowest weight.
15490 if (!CallOperandVal)
15491 return CW_Default;
15492 Type *type = CallOperandVal->getType();
15493
15494 // Look at the constraint type.
15495 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15496 return CW_Register; // an individual CR bit.
15497 else if ((StringRef(constraint) == "wa" ||
15498 StringRef(constraint) == "wd" ||
15499 StringRef(constraint) == "wf") &&
15500 type->isVectorTy())
15501 return CW_Register;
15502 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15503 return CW_Register; // just hold 64-bit integers data.
15504 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15505 return CW_Register;
15506 else if (StringRef(constraint) == "ww" && type->isFloatTy())
15507 return CW_Register;
15508
15509 switch (*constraint) {
15510 default:
15511 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15512 break;
15513 case 'b':
15514 if (type->isIntegerTy())
15515 weight = CW_Register;
15516 break;
15517 case 'f':
15518 if (type->isFloatTy())
15519 weight = CW_Register;
15520 break;
15521 case 'd':
15522 if (type->isDoubleTy())
15523 weight = CW_Register;
15524 break;
15525 case 'v':
15526 if (type->isVectorTy())
15527 weight = CW_Register;
15528 break;
15529 case 'y':
15530 weight = CW_Register;
15531 break;
15532 case 'Z':
15533 weight = CW_Memory;
15534 break;
15535 }
15536 return weight;
15537}
15538
15539std::pair<unsigned, const TargetRegisterClass *>
15540PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15541 StringRef Constraint,
15542 MVT VT) const {
15543 if (Constraint.size() == 1) {
15544 // GCC RS6000 Constraint Letters
15545 switch (Constraint[0]) {
15546 case 'b': // R1-R31
15547 if (VT == MVT::i64 && Subtarget.isPPC64())
15548 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15549 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15550 case 'r': // R0-R31
15551 if (VT == MVT::i64 && Subtarget.isPPC64())
15552 return std::make_pair(0U, &PPC::G8RCRegClass);
15553 return std::make_pair(0U, &PPC::GPRCRegClass);
15554 // 'd' and 'f' constraints are both defined to be "the floating point
15555 // registers", where one is for 32-bit and the other for 64-bit. We don't
15556 // really care overly much here so just give them all the same reg classes.
15557 case 'd':
15558 case 'f':
15559 if (Subtarget.hasSPE()) {
15560 if (VT == MVT::f32 || VT == MVT::i32)
15561 return std::make_pair(0U, &PPC::GPRCRegClass);
15562 if (VT == MVT::f64 || VT == MVT::i64)
15563 return std::make_pair(0U, &PPC::SPERCRegClass);
15564 } else {
15565 if (VT == MVT::f32 || VT == MVT::i32)
15566 return std::make_pair(0U, &PPC::F4RCRegClass);
15567 if (VT == MVT::f64 || VT == MVT::i64)
15568 return std::make_pair(0U, &PPC::F8RCRegClass);
15569 }
15570 break;
15571 case 'v':
15572 if (Subtarget.hasAltivec())
15573 return std::make_pair(0U, &PPC::VRRCRegClass);
15574 break;
15575 case 'y': // crrc
15576 return std::make_pair(0U, &PPC::CRRCRegClass);
15577 }
15578 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15579 // An individual CR bit.
15580 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15581 } else if ((Constraint == "wa" || Constraint == "wd" ||
15582 Constraint == "wf" || Constraint == "wi") &&
15583 Subtarget.hasVSX()) {
15584 return std::make_pair(0U, &PPC::VSRCRegClass);
15585 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15586 if (VT == MVT::f32 && Subtarget.hasP8Vector())
15587 return std::make_pair(0U, &PPC::VSSRCRegClass);
15588 else
15589 return std::make_pair(0U, &PPC::VSFRCRegClass);
15590 }
15591
15592 // If we name a VSX register, we can't defer to the base class because it
15593 // will not recognize the correct register (their names will be VSL{0-31}
15594 // and V{0-31} so they won't match). So we match them here.
15595 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15596 int VSNum = atoi(Constraint.data() + 3);
15597 assert(VSNum >= 0 && VSNum <= 63 &&((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15598, __PRETTY_FUNCTION__))
15598 "Attempted to access a vsr out of range")((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15598, __PRETTY_FUNCTION__))
;
15599 if (VSNum < 32)
15600 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15601 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15602 }
15603 std::pair<unsigned, const TargetRegisterClass *> R =
15604 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15605
15606 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15607 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15608 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15609 // register.
15610 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15611 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15612 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15613 PPC::GPRCRegClass.contains(R.first))
15614 return std::make_pair(TRI->getMatchingSuperReg(R.first,
15615 PPC::sub_32, &PPC::G8RCRegClass),
15616 &PPC::G8RCRegClass);
15617
15618 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15619 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15620 R.first = PPC::CR0;
15621 R.second = &PPC::CRRCRegClass;
15622 }
15623
15624 return R;
15625}
15626
15627/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15628/// vector. If it is invalid, don't add anything to Ops.
15629void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15630 std::string &Constraint,
15631 std::vector<SDValue>&Ops,
15632 SelectionDAG &DAG) const {
15633 SDValue Result;
15634
15635 // Only support length 1 constraints.
15636 if (Constraint.length() > 1) return;
15637
15638 char Letter = Constraint[0];
15639 switch (Letter) {
15640 default: break;
15641 case 'I':
15642 case 'J':
15643 case 'K':
15644 case 'L':
15645 case 'M':
15646 case 'N':
15647 case 'O':
15648 case 'P': {
15649 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15650 if (!CST) return; // Must be an immediate to match.
15651 SDLoc dl(Op);
15652 int64_t Value = CST->getSExtValue();
15653 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15654 // numbers are printed as such.
15655 switch (Letter) {
15656 default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15656)
;
15657 case 'I': // "I" is a signed 16-bit constant.
15658 if (isInt<16>(Value))
15659 Result = DAG.getTargetConstant(Value, dl, TCVT);
15660 break;
15661 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15662 if (isShiftedUInt<16, 16>(Value))
15663 Result = DAG.getTargetConstant(Value, dl, TCVT);
15664 break;
15665 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15666 if (isShiftedInt<16, 16>(Value))
15667 Result = DAG.getTargetConstant(Value, dl, TCVT);
15668 break;
15669 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15670 if (isUInt<16>(Value))
15671 Result = DAG.getTargetConstant(Value, dl, TCVT);
15672 break;
15673 case 'M': // "M" is a constant that is greater than 31.
15674 if (Value > 31)
15675 Result = DAG.getTargetConstant(Value, dl, TCVT);
15676 break;
15677 case 'N': // "N" is a positive constant that is an exact power of two.
15678 if (Value > 0 && isPowerOf2_64(Value))
15679 Result = DAG.getTargetConstant(Value, dl, TCVT);
15680 break;
15681 case 'O': // "O" is the constant zero.
15682 if (Value == 0)
15683 Result = DAG.getTargetConstant(Value, dl, TCVT);
15684 break;
15685 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15686 if (isInt<16>(-Value))
15687 Result = DAG.getTargetConstant(Value, dl, TCVT);
15688 break;
15689 }
15690 break;
15691 }
15692 }
15693
15694 if (Result.getNode()) {
15695 Ops.push_back(Result);
15696 return;
15697 }
15698
15699 // Handle standard constraint letters.
15700 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15701}
15702
15703// isLegalAddressingMode - Return true if the addressing mode represented
15704// by AM is legal for this target, for a load/store of the specified type.
15705bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15706 const AddrMode &AM, Type *Ty,
15707 unsigned AS,
15708 Instruction *I) const {
15709 // Vector type r+i form is supported since power9 as DQ form. We don't check
15710 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15711 // imm form is preferred and the offset can be adjusted to use imm form later
15712 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15713 // max offset to check legal addressing mode, we should be a little aggressive
15714 // to contain other offsets for that LSRUse.
15715 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15716 return false;
15717
15718 // PPC allows a sign-extended 16-bit immediate field.
15719 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15720 return false;
15721
15722 // No global is ever allowed as a base.
15723 if (AM.BaseGV)
15724 return false;
15725
15726 // PPC only support r+r,
15727 switch (AM.Scale) {
15728 case 0: // "r+i" or just "i", depending on HasBaseReg.
15729 break;
15730 case 1:
15731 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15732 return false;
15733 // Otherwise we have r+r or r+i.
15734 break;
15735 case 2:
15736 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15737 return false;
15738 // Allow 2*r as r+r.
15739 break;
15740 default:
15741 // No other scales are supported.
15742 return false;
15743 }
15744
15745 return true;
15746}
15747
15748SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15749 SelectionDAG &DAG) const {
15750 MachineFunction &MF = DAG.getMachineFunction();
15751 MachineFrameInfo &MFI = MF.getFrameInfo();
15752 MFI.setReturnAddressIsTaken(true);
15753
15754 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15755 return SDValue();
15756
15757 SDLoc dl(Op);
15758 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15759
15760 // Make sure the function does not optimize away the store of the RA to
15761 // the stack.
15762 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15763 FuncInfo->setLRStoreRequired();
15764 bool isPPC64 = Subtarget.isPPC64();
15765 auto PtrVT = getPointerTy(MF.getDataLayout());
15766
15767 if (Depth > 0) {
15768 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15769 SDValue Offset =
15770 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15771 isPPC64 ? MVT::i64 : MVT::i32);
15772 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15773 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15774 MachinePointerInfo());
15775 }
15776
15777 // Just load the return address off the stack.
15778 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15779 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15780 MachinePointerInfo());
15781}
15782
15783SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15784 SelectionDAG &DAG) const {
15785 SDLoc dl(Op);
15786 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15787
15788 MachineFunction &MF = DAG.getMachineFunction();
15789 MachineFrameInfo &MFI = MF.getFrameInfo();
15790 MFI.setFrameAddressIsTaken(true);
15791
15792 EVT PtrVT = getPointerTy(MF.getDataLayout());
15793 bool isPPC64 = PtrVT == MVT::i64;
15794
15795 // Naked functions never have a frame pointer, and so we use r1. For all
15796 // other functions, this decision must be delayed until during PEI.
15797 unsigned FrameReg;
15798 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15799 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15800 else
15801 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15802
15803 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15804 PtrVT);
15805 while (Depth--)
15806 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15807 FrameAddr, MachinePointerInfo());
15808 return FrameAddr;
15809}
15810
15811// FIXME? Maybe this could be a TableGen attribute on some registers and
15812// this table could be generated automatically from RegInfo.
15813Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15814 const MachineFunction &MF) const {
15815 bool isPPC64 = Subtarget.isPPC64();
15816
15817 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15818 if (!is64Bit && VT != LLT::scalar(32))
15819 report_fatal_error("Invalid register global variable type");
15820
15821 Register Reg = StringSwitch<Register>(RegName)
15822 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15823 .Case("r2", isPPC64 ? Register() : PPC::R2)
15824 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15825 .Default(Register());
15826
15827 if (Reg)
15828 return Reg;
15829 report_fatal_error("Invalid register name global variable");
15830}
15831
15832bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15833 // 32-bit SVR4 ABI access everything as got-indirect.
15834 if (Subtarget.is32BitELFABI())
15835 return true;
15836
15837 // AIX accesses everything indirectly through the TOC, which is similar to
15838 // the GOT.
15839 if (Subtarget.isAIXABI())
15840 return true;
15841
15842 CodeModel::Model CModel = getTargetMachine().getCodeModel();
15843 // If it is small or large code model, module locals are accessed
15844 // indirectly by loading their address from .toc/.got.
15845 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15846 return true;
15847
15848 // JumpTable and BlockAddress are accessed as got-indirect.
15849 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15850 return true;
15851
15852 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15853 return Subtarget.isGVIndirectSymbol(G->getGlobal());
15854
15855 return false;
15856}
15857
15858bool
15859PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15860 // The PowerPC target isn't yet aware of offsets.
15861 return false;
15862}
15863
15864bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15865 const CallInst &I,
15866 MachineFunction &MF,
15867 unsigned Intrinsic) const {
15868 switch (Intrinsic) {
15869 case Intrinsic::ppc_altivec_lvx:
15870 case Intrinsic::ppc_altivec_lvxl:
15871 case Intrinsic::ppc_altivec_lvebx:
15872 case Intrinsic::ppc_altivec_lvehx:
15873 case Intrinsic::ppc_altivec_lvewx:
15874 case Intrinsic::ppc_vsx_lxvd2x:
15875 case Intrinsic::ppc_vsx_lxvw4x:
15876 case Intrinsic::ppc_vsx_lxvd2x_be:
15877 case Intrinsic::ppc_vsx_lxvw4x_be:
15878 case Intrinsic::ppc_vsx_lxvl:
15879 case Intrinsic::ppc_vsx_lxvll: {
15880 EVT VT;
15881 switch (Intrinsic) {
15882 case Intrinsic::ppc_altivec_lvebx:
15883 VT = MVT::i8;
15884 break;
15885 case Intrinsic::ppc_altivec_lvehx:
15886 VT = MVT::i16;
15887 break;
15888 case Intrinsic::ppc_altivec_lvewx:
15889 VT = MVT::i32;
15890 break;
15891 case Intrinsic::ppc_vsx_lxvd2x:
15892 case Intrinsic::ppc_vsx_lxvd2x_be:
15893 VT = MVT::v2f64;
15894 break;
15895 default:
15896 VT = MVT::v4i32;
15897 break;
15898 }
15899
15900 Info.opc = ISD::INTRINSIC_W_CHAIN;
15901 Info.memVT = VT;
15902 Info.ptrVal = I.getArgOperand(0);
15903 Info.offset = -VT.getStoreSize()+1;
15904 Info.size = 2*VT.getStoreSize()-1;
15905 Info.align = Align(1);
15906 Info.flags = MachineMemOperand::MOLoad;
15907 return true;
15908 }
15909 case Intrinsic::ppc_altivec_stvx:
15910 case Intrinsic::ppc_altivec_stvxl:
15911 case Intrinsic::ppc_altivec_stvebx:
15912 case Intrinsic::ppc_altivec_stvehx:
15913 case Intrinsic::ppc_altivec_stvewx:
15914 case Intrinsic::ppc_vsx_stxvd2x:
15915 case Intrinsic::ppc_vsx_stxvw4x:
15916 case Intrinsic::ppc_vsx_stxvd2x_be:
15917 case Intrinsic::ppc_vsx_stxvw4x_be:
15918 case Intrinsic::ppc_vsx_stxvl:
15919 case Intrinsic::ppc_vsx_stxvll: {
15920 EVT VT;
15921 switch (Intrinsic) {
15922 case Intrinsic::ppc_altivec_stvebx:
15923 VT = MVT::i8;
15924 break;
15925 case Intrinsic::ppc_altivec_stvehx:
15926 VT = MVT::i16;
15927 break;
15928 case Intrinsic::ppc_altivec_stvewx:
15929 VT = MVT::i32;
15930 break;
15931 case Intrinsic::ppc_vsx_stxvd2x:
15932 case Intrinsic::ppc_vsx_stxvd2x_be:
15933 VT = MVT::v2f64;
15934 break;
15935 default:
15936 VT = MVT::v4i32;
15937 break;
15938 }
15939
15940 Info.opc = ISD::INTRINSIC_VOID;
15941 Info.memVT = VT;
15942 Info.ptrVal = I.getArgOperand(1);
15943 Info.offset = -VT.getStoreSize()+1;
15944 Info.size = 2*VT.getStoreSize()-1;
15945 Info.align = Align(1);
15946 Info.flags = MachineMemOperand::MOStore;
15947 return true;
15948 }
15949 default:
15950 break;
15951 }
15952
15953 return false;
15954}
15955
15956/// It returns EVT::Other if the type should be determined using generic
15957/// target-independent logic.
15958EVT PPCTargetLowering::getOptimalMemOpType(
15959 const MemOp &Op, const AttributeList &FuncAttributes) const {
15960 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15961 // We should use Altivec/VSX loads and stores when available. For unaligned
15962 // addresses, unaligned VSX loads are only fast starting with the P8.
15963 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15964 (Op.isAligned(Align(16)) ||
15965 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15966 return MVT::v4i32;
15967 }
15968
15969 if (Subtarget.isPPC64()) {
15970 return MVT::i64;
15971 }
15972
15973 return MVT::i32;
15974}
15975
15976/// Returns true if it is beneficial to convert a load of a constant
15977/// to just the constant itself.
15978bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15979 Type *Ty) const {
15980 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15980, __PRETTY_FUNCTION__))
;
15981
15982 unsigned BitSize = Ty->getPrimitiveSizeInBits();
15983 return !(BitSize == 0 || BitSize > 64);
15984}
15985
15986bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15987 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15988 return false;
15989 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15990 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15991 return NumBits1 == 64 && NumBits2 == 32;
15992}
15993
15994bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15995 if (!VT1.isInteger() || !VT2.isInteger())
15996 return false;
15997 unsigned NumBits1 = VT1.getSizeInBits();
15998 unsigned NumBits2 = VT2.getSizeInBits();
15999 return NumBits1 == 64 && NumBits2 == 32;
16000}
16001
16002bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16003 // Generally speaking, zexts are not free, but they are free when they can be
16004 // folded with other operations.
16005 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16006 EVT MemVT = LD->getMemoryVT();
16007 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16008 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16009 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16010 LD->getExtensionType() == ISD::ZEXTLOAD))
16011 return true;
16012 }
16013
16014 // FIXME: Add other cases...
16015 // - 32-bit shifts with a zext to i64
16016 // - zext after ctlz, bswap, etc.
16017 // - zext after and by a constant mask
16018
16019 return TargetLowering::isZExtFree(Val, VT2);
16020}
16021
16022bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16023 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16024, __PRETTY_FUNCTION__))
16024 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16024, __PRETTY_FUNCTION__))
;
16025 // Extending to float128 is not free.
16026 if (DestVT == MVT::f128)
16027 return false;
16028 return true;
16029}
16030
16031bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16032 return isInt<16>(Imm) || isUInt<16>(Imm);
16033}
16034
16035bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16036 return isInt<16>(Imm) || isUInt<16>(Imm);
16037}
16038
16039bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
16040 unsigned,
16041 unsigned,
16042 MachineMemOperand::Flags,
16043 bool *Fast) const {
16044 if (DisablePPCUnaligned)
16045 return false;
16046
16047 // PowerPC supports unaligned memory access for simple non-vector types.
16048 // Although accessing unaligned addresses is not as efficient as accessing
16049 // aligned addresses, it is generally more efficient than manual expansion,
16050 // and generally only traps for software emulation when crossing page
16051 // boundaries.
16052
16053 if (!VT.isSimple())
16054 return false;
16055
16056 if (VT.isFloatingPoint() && !VT.isVector() &&
16057 !Subtarget.allowsUnalignedFPAccess())
16058 return false;
16059
16060 if (VT.getSimpleVT().isVector()) {
16061 if (Subtarget.hasVSX()) {
16062 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16063 VT != MVT::v4f32 && VT != MVT::v4i32)
16064 return false;
16065 } else {
16066 return false;
16067 }
16068 }
16069
16070 if (VT == MVT::ppcf128)
16071 return false;
16072
16073 if (Fast)
16074 *Fast = true;
16075
16076 return true;
16077}
16078
16079bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16080 SDValue C) const {
16081 // Check integral scalar types.
16082 if (!VT.isScalarInteger())
16083 return false;
16084 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16085 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16086 return false;
16087 // This transformation will generate >= 2 operations. But the following
16088 // cases will generate <= 2 instructions during ISEL. So exclude them.
16089 // 1. If the constant multiplier fits 16 bits, it can be handled by one
16090 // HW instruction, ie. MULLI
16091 // 2. If the multiplier after shifted fits 16 bits, an extra shift
16092 // instruction is needed than case 1, ie. MULLI and RLDICR
16093 int64_t Imm = ConstNode->getSExtValue();
16094 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16095 Imm >>= Shift;
16096 if (isInt<16>(Imm))
16097 return false;
16098 uint64_t UImm = static_cast<uint64_t>(Imm);
16099 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16100 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16101 return true;
16102 }
16103 return false;
16104}
16105
16106bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16107 EVT VT) const {
16108 return isFMAFasterThanFMulAndFAdd(
16109 MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16110}
16111
16112bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16113 Type *Ty) const {
16114 switch (Ty->getScalarType()->getTypeID()) {
16115 case Type::FloatTyID:
16116 case Type::DoubleTyID:
16117 return true;
16118 case Type::FP128TyID:
16119 return Subtarget.hasP9Vector();
16120 default:
16121 return false;
16122 }
16123}
16124
16125// FIXME: add more patterns which are not profitable to hoist.
16126bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16127 if (!I->hasOneUse())
16128 return true;
16129
16130 Instruction *User = I->user_back();
16131 assert(User && "A single use instruction with no uses.")((User && "A single use instruction with no uses.") ?
static_cast<void> (0) : __assert_fail ("User && \"A single use instruction with no uses.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16131, __PRETTY_FUNCTION__))
;
16132
16133 switch (I->getOpcode()) {
16134 case Instruction::FMul: {
16135 // Don't break FMA, PowerPC prefers FMA.
16136 if (User->getOpcode() != Instruction::FSub &&
16137 User->getOpcode() != Instruction::FAdd)
16138 return true;
16139
16140 const TargetOptions &Options = getTargetMachine().Options;
16141 const Function *F = I->getFunction();
16142 const DataLayout &DL = F->getParent()->getDataLayout();
16143 Type *Ty = User->getOperand(0)->getType();
16144
16145 return !(
16146 isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16147 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16148 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16149 }
16150 case Instruction::Load: {
16151 // Don't break "store (load float*)" pattern, this pattern will be combined
16152 // to "store (load int32)" in later InstCombine pass. See function
16153 // combineLoadToOperationType. On PowerPC, loading a float point takes more
16154 // cycles than loading a 32 bit integer.
16155 LoadInst *LI = cast<LoadInst>(I);
16156 // For the loads that combineLoadToOperationType does nothing, like
16157 // ordered load, it should be profitable to hoist them.
16158 // For swifterror load, it can only be used for pointer to pointer type, so
16159 // later type check should get rid of this case.
16160 if (!LI->isUnordered())
16161 return true;
16162
16163 if (User->getOpcode() != Instruction::Store)
16164 return true;
16165
16166 if (I->getType()->getTypeID() != Type::FloatTyID)
16167 return true;
16168
16169 return false;
16170 }
16171 default:
16172 return true;
16173 }
16174 return true;
16175}
16176
16177const MCPhysReg *
16178PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16179 // LR is a callee-save register, but we must treat it as clobbered by any call
16180 // site. Hence we include LR in the scratch registers, which are in turn added
16181 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16182 // to CTR, which is used by any indirect call.
16183 static const MCPhysReg ScratchRegs[] = {
16184 PPC::X12, PPC::LR8, PPC::CTR8, 0
16185 };
16186
16187 return ScratchRegs;
16188}
16189
16190Register PPCTargetLowering::getExceptionPointerRegister(
16191 const Constant *PersonalityFn) const {
16192 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16193}
16194
16195Register PPCTargetLowering::getExceptionSelectorRegister(
16196 const Constant *PersonalityFn) const {
16197 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16198}
16199
16200bool
16201PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16202 EVT VT , unsigned DefinedValues) const {
16203 if (VT == MVT::v2i64)
16204 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16205
16206 if (Subtarget.hasVSX())
16207 return true;
16208
16209 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16210}
16211
16212Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16213 if (DisableILPPref || Subtarget.enableMachineScheduler())
16214 return TargetLowering::getSchedulingPreference(N);
16215
16216 return Sched::ILP;
16217}
16218
16219// Create a fast isel object.
16220FastISel *
16221PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16222 const TargetLibraryInfo *LibInfo) const {
16223 return PPC::createFastISel(FuncInfo, LibInfo);
16224}
16225
16226// 'Inverted' means the FMA opcode after negating one multiplicand.
16227// For example, (fma -a b c) = (fnmsub a b c)
16228static unsigned invertFMAOpcode(unsigned Opc) {
16229 switch (Opc) {
16230 default:
16231 llvm_unreachable("Invalid FMA opcode for PowerPC!")::llvm::llvm_unreachable_internal("Invalid FMA opcode for PowerPC!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16231)
;
16232 case ISD::FMA:
16233 return PPCISD::FNMSUB;
16234 case PPCISD::FNMSUB:
16235 return ISD::FMA;
16236 }
16237}
16238
16239SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16240 bool LegalOps, bool OptForSize,
16241 NegatibleCost &Cost,
16242 unsigned Depth) const {
16243 if (Depth > SelectionDAG::MaxRecursionDepth)
16244 return SDValue();
16245
16246 unsigned Opc = Op.getOpcode();
16247 EVT VT = Op.getValueType();
16248 SDNodeFlags Flags = Op.getNode()->getFlags();
16249
16250 switch (Opc) {
16251 case PPCISD::FNMSUB:
16252 if (!Op.hasOneUse() || !isTypeLegal(VT))
16253 break;
16254
16255 const TargetOptions &Options = getTargetMachine().Options;
16256 SDValue N0 = Op.getOperand(0);
16257 SDValue N1 = Op.getOperand(1);
16258 SDValue N2 = Op.getOperand(2);
16259 SDLoc Loc(Op);
16260
16261 NegatibleCost N2Cost = NegatibleCost::Expensive;
16262 SDValue NegN2 =
16263 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16264
16265 if (!NegN2)
16266 return SDValue();
16267
16268 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16269 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16270 // These transformations may change sign of zeroes. For example,
16271 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16272 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16273 // Try and choose the cheaper one to negate.
16274 NegatibleCost N0Cost = NegatibleCost::Expensive;
16275 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16276 N0Cost, Depth + 1);
16277
16278 NegatibleCost N1Cost = NegatibleCost::Expensive;
16279 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16280 N1Cost, Depth + 1);
16281
16282 if (NegN0 && N0Cost <= N1Cost) {
16283 Cost = std::min(N0Cost, N2Cost);
16284 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16285 } else if (NegN1) {
16286 Cost = std::min(N1Cost, N2Cost);
16287 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16288 }
16289 }
16290
16291 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16292 if (isOperationLegal(ISD::FMA, VT)) {
16293 Cost = N2Cost;
16294 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16295 }
16296
16297 break;
16298 }
16299
16300 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16301 Cost, Depth);
16302}
16303
16304// Override to enable LOAD_STACK_GUARD lowering on Linux.
16305bool PPCTargetLowering::useLoadStackGuardNode() const {
16306 if (!Subtarget.isTargetLinux())
16307 return TargetLowering::useLoadStackGuardNode();
16308 return true;
16309}
16310
16311// Override to disable global variable loading on Linux.
16312void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16313 if (!Subtarget.isTargetLinux())
16314 return TargetLowering::insertSSPDeclarations(M);
16315}
16316
16317bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16318 bool ForCodeSize) const {
16319 if (!VT.isSimple() || !Subtarget.hasVSX())
16320 return false;
16321
16322 switch(VT.getSimpleVT().SimpleTy) {
16323 default:
16324 // For FP types that are currently not supported by PPC backend, return
16325 // false. Examples: f16, f80.
16326 return false;
16327 case MVT::f32:
16328 case MVT::f64:
16329 if (Subtarget.hasPrefixInstrs()) {
16330 // With prefixed instructions, we can materialize anything that can be
16331 // represented with a 32-bit immediate, not just positive zero.
16332 APFloat APFloatOfImm = Imm;
16333 return convertToNonDenormSingle(APFloatOfImm);
16334 }
16335 LLVM_FALLTHROUGH[[gnu::fallthrough]];
16336 case MVT::ppcf128:
16337 return Imm.isPosZero();
16338 }
16339}
16340
16341// For vector shift operation op, fold
16342// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16343static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16344 SelectionDAG &DAG) {
16345 SDValue N0 = N->getOperand(0);
16346 SDValue N1 = N->getOperand(1);
16347 EVT VT = N0.getValueType();
16348 unsigned OpSizeInBits = VT.getScalarSizeInBits();
16349 unsigned Opcode = N->getOpcode();
16350 unsigned TargetOpcode;
16351
16352 switch (Opcode) {
16353 default:
16354 llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16354)
;
16355 case ISD::SHL:
16356 TargetOpcode = PPCISD::SHL;
16357 break;
16358 case ISD::SRL:
16359 TargetOpcode = PPCISD::SRL;
16360 break;
16361 case ISD::SRA:
16362 TargetOpcode = PPCISD::SRA;
16363 break;
16364 }
16365
16366 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16367 N1->getOpcode() == ISD::AND)
16368 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16369 if (Mask->getZExtValue() == OpSizeInBits - 1)
16370 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16371
16372 return SDValue();
16373}
16374
16375SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16376 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16377 return Value;
16378
16379 SDValue N0 = N->getOperand(0);
16380 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16381 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16382 N0.getOpcode() != ISD::SIGN_EXTEND ||
16383 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16384 N->getValueType(0) != MVT::i64)
16385 return SDValue();
16386
16387 // We can't save an operation here if the value is already extended, and
16388 // the existing shift is easier to combine.
16389 SDValue ExtsSrc = N0.getOperand(0);
16390 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16391 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16392 return SDValue();
16393
16394 SDLoc DL(N0);
16395 SDValue ShiftBy = SDValue(CN1, 0);
16396 // We want the shift amount to be i32 on the extswli, but the shift could
16397 // have an i64.
16398 if (ShiftBy.getValueType() == MVT::i64)
16399 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16400
16401 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16402 ShiftBy);
16403}
16404
16405SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16406 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16407 return Value;
16408
16409 return SDValue();
16410}
16411
16412SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16413 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16414 return Value;
16415
16416 return SDValue();
16417}
16418
16419// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16420// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16421// When C is zero, the equation (addi Z, -C) can be simplified to Z
16422// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16423static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16424 const PPCSubtarget &Subtarget) {
16425 if (!Subtarget.isPPC64())
16426 return SDValue();
16427
16428 SDValue LHS = N->getOperand(0);
16429 SDValue RHS = N->getOperand(1);
16430
16431 auto isZextOfCompareWithConstant = [](SDValue Op) {
16432 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16433 Op.getValueType() != MVT::i64)
16434 return false;
16435
16436 SDValue Cmp = Op.getOperand(0);
16437 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16438 Cmp.getOperand(0).getValueType() != MVT::i64)
16439 return false;
16440
16441 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16442 int64_t NegConstant = 0 - Constant->getSExtValue();
16443 // Due to the limitations of the addi instruction,
16444 // -C is required to be [-32768, 32767].
16445 return isInt<16>(NegConstant);
16446 }
16447
16448 return false;
16449 };
16450
16451 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16452 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16453
16454 // If there is a pattern, canonicalize a zext operand to the RHS.
16455 if (LHSHasPattern && !RHSHasPattern)
16456 std::swap(LHS, RHS);
16457 else if (!LHSHasPattern && !RHSHasPattern)
16458 return SDValue();
16459
16460 SDLoc DL(N);
16461 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16462 SDValue Cmp = RHS.getOperand(0);
16463 SDValue Z = Cmp.getOperand(0);
16464 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16465
16466 assert(Constant && "Constant Should not be a null pointer.")((Constant && "Constant Should not be a null pointer."
) ? static_cast<void> (0) : __assert_fail ("Constant && \"Constant Should not be a null pointer.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16466, __PRETTY_FUNCTION__))
;
16467 int64_t NegConstant = 0 - Constant->getSExtValue();
16468
16469 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16470 default: break;
16471 case ISD::SETNE: {
16472 // when C == 0
16473 // --> addze X, (addic Z, -1).carry
16474 // /
16475 // add X, (zext(setne Z, C))--
16476 // \ when -32768 <= -C <= 32767 && C != 0
16477 // --> addze X, (addic (addi Z, -C), -1).carry
16478 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16479 DAG.getConstant(NegConstant, DL, MVT::i64));
16480 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16481 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16482 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16483 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16484 SDValue(Addc.getNode(), 1));
16485 }
16486 case ISD::SETEQ: {
16487 // when C == 0
16488 // --> addze X, (subfic Z, 0).carry
16489 // /
16490 // add X, (zext(sete Z, C))--
16491 // \ when -32768 <= -C <= 32767 && C != 0
16492 // --> addze X, (subfic (addi Z, -C), 0).carry
16493 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16494 DAG.getConstant(NegConstant, DL, MVT::i64));
16495 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16496 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16497 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16498 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16499 SDValue(Subc.getNode(), 1));
16500 }
16501 }
16502
16503 return SDValue();
16504}
16505
16506// Transform
16507// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16508// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16509// In this case both C1 and C2 must be known constants.
16510// C1+C2 must fit into a 34 bit signed integer.
16511static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16512 const PPCSubtarget &Subtarget) {
16513 if (!Subtarget.isUsingPCRelativeCalls())
16514 return SDValue();
16515
16516 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16517 // If we find that node try to cast the Global Address and the Constant.
16518 SDValue LHS = N->getOperand(0);
16519 SDValue RHS = N->getOperand(1);
16520
16521 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16522 std::swap(LHS, RHS);
16523
16524 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16525 return SDValue();
16526
16527 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16528 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16529 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16530
16531 // Check that both casts succeeded.
16532 if (!GSDN || !ConstNode)
16533 return SDValue();
16534
16535 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16536 SDLoc DL(GSDN);
16537
16538 // The signed int offset needs to fit in 34 bits.
16539 if (!isInt<34>(NewOffset))
16540 return SDValue();
16541
16542 // The new global address is a copy of the old global address except
16543 // that it has the updated Offset.
16544 SDValue GA =
16545 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16546 NewOffset, GSDN->getTargetFlags());
16547 SDValue MatPCRel =
16548 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16549 return MatPCRel;
16550}
16551
16552SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16553 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16554 return Value;
16555
16556 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16557 return Value;
16558
16559 return SDValue();
16560}
16561
16562// Detect TRUNCATE operations on bitcasts of float128 values.
16563// What we are looking for here is the situtation where we extract a subset
16564// of bits from a 128 bit float.
16565// This can be of two forms:
16566// 1) BITCAST of f128 feeding TRUNCATE
16567// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16568// The reason this is required is because we do not have a legal i128 type
16569// and so we want to prevent having to store the f128 and then reload part
16570// of it.
16571SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16572 DAGCombinerInfo &DCI) const {
16573 // If we are using CRBits then try that first.
16574 if (Subtarget.useCRBits()) {
16575 // Check if CRBits did anything and return that if it did.
16576 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16577 return CRTruncValue;
16578 }
16579
16580 SDLoc dl(N);
16581 SDValue Op0 = N->getOperand(0);
16582
16583 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16584 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16585 EVT VT = N->getValueType(0);
16586 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16587 return SDValue();
16588 SDValue Sub = Op0.getOperand(0);
16589 if (Sub.getOpcode() == ISD::SUB) {
16590 SDValue SubOp0 = Sub.getOperand(0);
16591 SDValue SubOp1 = Sub.getOperand(1);
16592 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16593 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16594 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16595 SubOp1.getOperand(0),
16596 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16597 }
16598 }
16599 }
16600
16601 // Looking for a truncate of i128 to i64.
16602 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16603 return SDValue();
16604
16605 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16606
16607 // SRL feeding TRUNCATE.
16608 if (Op0.getOpcode() == ISD::SRL) {
16609 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16610 // The right shift has to be by 64 bits.
16611 if (!ConstNode || ConstNode->getZExtValue() != 64)
16612 return SDValue();
16613
16614 // Switch the element number to extract.
16615 EltToExtract = EltToExtract ? 0 : 1;
16616 // Update Op0 past the SRL.
16617 Op0 = Op0.getOperand(0);
16618 }
16619
16620 // BITCAST feeding a TRUNCATE possibly via SRL.
16621 if (Op0.getOpcode() == ISD::BITCAST &&
16622 Op0.getValueType() == MVT::i128 &&
16623 Op0.getOperand(0).getValueType() == MVT::f128) {
16624 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16625 return DCI.DAG.getNode(
16626 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16627 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16628 }
16629 return SDValue();
16630}
16631
16632SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16633 SelectionDAG &DAG = DCI.DAG;
16634
16635 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16636 if (!ConstOpOrElement)
16637 return SDValue();
16638
16639 // An imul is usually smaller than the alternative sequence for legal type.
16640 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16641 isOperationLegal(ISD::MUL, N->getValueType(0)))
16642 return SDValue();
16643
16644 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16645 switch (this->Subtarget.getCPUDirective()) {
16646 default:
16647 // TODO: enhance the condition for subtarget before pwr8
16648 return false;
16649 case PPC::DIR_PWR8:
16650 // type mul add shl
16651 // scalar 4 1 1
16652 // vector 7 2 2
16653 return true;
16654 case PPC::DIR_PWR9:
16655 case PPC::DIR_PWR10:
16656 case PPC::DIR_PWR_FUTURE:
16657 // type mul add shl
16658 // scalar 5 2 2
16659 // vector 7 2 2
16660
16661 // The cycle RATIO of related operations are showed as a table above.
16662 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16663 // scalar and vector type. For 2 instrs patterns, add/sub + shl
16664 // are 4, it is always profitable; but for 3 instrs patterns
16665 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16666 // So we should only do it for vector type.
16667 return IsAddOne && IsNeg ? VT.isVector() : true;
16668 }
16669 };
16670
16671 EVT VT = N->getValueType(0);
16672 SDLoc DL(N);
16673
16674 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16675 bool IsNeg = MulAmt.isNegative();
16676 APInt MulAmtAbs = MulAmt.abs();
16677
16678 if ((MulAmtAbs - 1).isPowerOf2()) {
16679 // (mul x, 2^N + 1) => (add (shl x, N), x)
16680 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16681
16682 if (!IsProfitable(IsNeg, true, VT))
16683 return SDValue();
16684
16685 SDValue Op0 = N->getOperand(0);
16686 SDValue Op1 =
16687 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16688 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16689 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16690
16691 if (!IsNeg)
16692 return Res;
16693
16694 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16695 } else if ((MulAmtAbs + 1).isPowerOf2()) {
16696 // (mul x, 2^N - 1) => (sub (shl x, N), x)
16697 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16698
16699 if (!IsProfitable(IsNeg, false, VT))
16700 return SDValue();
16701
16702 SDValue Op0 = N->getOperand(0);
16703 SDValue Op1 =
16704 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16705 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16706
16707 if (!IsNeg)
16708 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16709 else
16710 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16711
16712 } else {
16713 return SDValue();
16714 }
16715}
16716
16717// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16718// in combiner since we need to check SD flags and other subtarget features.
16719SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16720 DAGCombinerInfo &DCI) const {
16721 SDValue N0 = N->getOperand(0);
16722 SDValue N1 = N->getOperand(1);
16723 SDValue N2 = N->getOperand(2);
16724 SDNodeFlags Flags = N->getFlags();
16725 EVT VT = N->getValueType(0);
16726 SelectionDAG &DAG = DCI.DAG;
16727 const TargetOptions &Options = getTargetMachine().Options;
16728 unsigned Opc = N->getOpcode();
16729 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16730 bool LegalOps = !DCI.isBeforeLegalizeOps();
16731 SDLoc Loc(N);
16732
16733 if (!isOperationLegal(ISD::FMA, VT))
16734 return SDValue();
16735
16736 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16737 // since (fnmsub a b c)=-0 while c-ab=+0.
16738 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16739 return SDValue();
16740
16741 // (fma (fneg a) b c) => (fnmsub a b c)
16742 // (fnmsub (fneg a) b c) => (fma a b c)
16743 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16744 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16745
16746 // (fma a (fneg b) c) => (fnmsub a b c)
16747 // (fnmsub a (fneg b) c) => (fma a b c)
16748 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16749 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16750
16751 return SDValue();
16752}
16753
16754bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16755 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16756 if (!Subtarget.is64BitELFABI())
16757 return false;
16758
16759 // If not a tail call then no need to proceed.
16760 if (!CI->isTailCall())
16761 return false;
16762
16763 // If sibling calls have been disabled and tail-calls aren't guaranteed
16764 // there is no reason to duplicate.
16765 auto &TM = getTargetMachine();
16766 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16767 return false;
16768
16769 // Can't tail call a function called indirectly, or if it has variadic args.
16770 const Function *Callee = CI->getCalledFunction();
16771 if (!Callee || Callee->isVarArg())
16772 return false;
16773
16774 // Make sure the callee and caller calling conventions are eligible for tco.
16775 const Function *Caller = CI->getParent()->getParent();
16776 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16777 CI->getCallingConv()))
16778 return false;
16779
16780 // If the function is local then we have a good chance at tail-calling it
16781 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16782}
16783
16784bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16785 if (!Subtarget.hasVSX())
16786 return false;
16787 if (Subtarget.hasP9Vector() && VT == MVT::f128)
16788 return true;
16789 return VT == MVT::f32 || VT == MVT::f64 ||
16790 VT == MVT::v4f32 || VT == MVT::v2f64;
16791}
16792
16793bool PPCTargetLowering::
16794isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16795 const Value *Mask = AndI.getOperand(1);
16796 // If the mask is suitable for andi. or andis. we should sink the and.
16797 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16798 // Can't handle constants wider than 64-bits.
16799 if (CI->getBitWidth() > 64)
16800 return false;
16801 int64_t ConstVal = CI->getZExtValue();
16802 return isUInt<16>(ConstVal) ||
16803 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16804 }
16805
16806 // For non-constant masks, we can always use the record-form and.
16807 return true;
16808}
16809
16810// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16811// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16812// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16813// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16814// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16815SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16816 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(((N->getOpcode() == ISD::ABS) && "Need ABS node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16816, __PRETTY_FUNCTION__))
;
16817 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__))
16818 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__))
;
16819 EVT VT = N->getValueType(0);
16820 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16821 return SDValue();
16822
16823 SelectionDAG &DAG = DCI.DAG;
16824 SDLoc dl(N);
16825 if (N->getOperand(0).getOpcode() == ISD::SUB) {
16826 // Even for signed integers, if it's known to be positive (as signed
16827 // integer) due to zero-extended inputs.
16828 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16829 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16830 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16831 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16832 (SubOpcd1 == ISD::ZERO_EXTEND ||
16833 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16834 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16835 N->getOperand(0)->getOperand(0),
16836 N->getOperand(0)->getOperand(1),
16837 DAG.getTargetConstant(0, dl, MVT::i32));
16838 }
16839
16840 // For type v4i32, it can be optimized with xvnegsp + vabsduw
16841 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16842 N->getOperand(0).hasOneUse()) {
16843 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16844 N->getOperand(0)->getOperand(0),
16845 N->getOperand(0)->getOperand(1),
16846 DAG.getTargetConstant(1, dl, MVT::i32));
16847 }
16848 }
16849
16850 return SDValue();
16851}
16852
16853// For type v4i32/v8ii16/v16i8, transform
16854// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16855// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16856// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16857// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16858SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16859 DAGCombinerInfo &DCI) const {
16860 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16860, __PRETTY_FUNCTION__))
;
16861 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__))
16862 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__))
;
16863
16864 SelectionDAG &DAG = DCI.DAG;
16865 SDLoc dl(N);
16866 SDValue Cond = N->getOperand(0);
16867 SDValue TrueOpnd = N->getOperand(1);
16868 SDValue FalseOpnd = N->getOperand(2);
16869 EVT VT = N->getOperand(1).getValueType();
16870
16871 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16872 FalseOpnd.getOpcode() != ISD::SUB)
16873 return SDValue();
16874
16875 // ABSD only available for type v4i32/v8i16/v16i8
16876 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16877 return SDValue();
16878
16879 // At least to save one more dependent computation
16880 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16881 return SDValue();
16882
16883 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16884
16885 // Can only handle unsigned comparison here
16886 switch (CC) {
16887 default:
16888 return SDValue();
16889 case ISD::SETUGT:
16890 case ISD::SETUGE:
16891 break;
16892 case ISD::SETULT:
16893 case ISD::SETULE:
16894 std::swap(TrueOpnd, FalseOpnd);
16895 break;
16896 }
16897
16898 SDValue CmpOpnd1 = Cond.getOperand(0);
16899 SDValue CmpOpnd2 = Cond.getOperand(1);
16900
16901 // SETCC CmpOpnd1 CmpOpnd2 cond
16902 // TrueOpnd = CmpOpnd1 - CmpOpnd2
16903 // FalseOpnd = CmpOpnd2 - CmpOpnd1
16904 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16905 TrueOpnd.getOperand(1) == CmpOpnd2 &&
16906 FalseOpnd.getOperand(0) == CmpOpnd2 &&
16907 FalseOpnd.getOperand(1) == CmpOpnd1) {
16908 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16909 CmpOpnd1, CmpOpnd2,
16910 DAG.getTargetConstant(0, dl, MVT::i32));
16911 }
16912
16913 return SDValue();
16914}

/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61template <typename T> struct DenseMapInfo;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88 /// If N is a BUILD_VECTOR node whose elements are all the same constant or
89 /// undefined, return true and return the constant value in \p SplatValue.
90 bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
91
92 /// Return true if the specified node is a BUILD_VECTOR where all of the
93 /// elements are ~0 or undef.
94 bool isBuildVectorAllOnes(const SDNode *N);
95
96 /// Return true if the specified node is a BUILD_VECTOR where all of the
97 /// elements are 0 or undef.
98 bool isBuildVectorAllZeros(const SDNode *N);
99
100 /// Return true if the specified node is a BUILD_VECTOR node of all
101 /// ConstantSDNode or undef.
102 bool isBuildVectorOfConstantSDNodes(const SDNode *N);
103
104 /// Return true if the specified node is a BUILD_VECTOR node of all
105 /// ConstantFPSDNode or undef.
106 bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
107
108 /// Return true if the node has at least one operand and all operands of the
109 /// specified node are ISD::UNDEF.
110 bool allOperandsUndef(const SDNode *N);
111
112} // end namespace ISD
113
114//===----------------------------------------------------------------------===//
115/// Unlike LLVM values, Selection DAG nodes may return multiple
116/// values as the result of a computation. Many nodes return multiple values,
117/// from loads (which define a token and a return value) to ADDC (which returns
118/// a result and a carry value), to calls (which may return an arbitrary number
119/// of values).
120///
121/// As such, each use of a SelectionDAG computation must indicate the node that
122/// computes it as well as which return value to use from that node. This pair
123/// of information is represented with the SDValue value type.
124///
125class SDValue {
126 friend struct DenseMapInfo<SDValue>;
127
128 SDNode *Node = nullptr; // The node defining the value we are using.
129 unsigned ResNo = 0; // Which return value of the node we are using.
130
131public:
132 SDValue() = default;
133 SDValue(SDNode *node, unsigned resno);
134
135 /// get the index which selects a specific result in the SDNode
136 unsigned getResNo() const { return ResNo; }
137
138 /// get the SDNode which holds the desired result
139 SDNode *getNode() const { return Node; }
140
141 /// set the SDNode
142 void setNode(SDNode *N) { Node = N; }
143
144 inline SDNode *operator->() const { return Node; }
145
146 bool operator==(const SDValue &O) const {
147 return Node == O.Node && ResNo == O.ResNo;
148 }
149 bool operator!=(const SDValue &O) const {
150 return !operator==(O);
151 }
152 bool operator<(const SDValue &O) const {
153 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
154 }
155 explicit operator bool() const {
156 return Node != nullptr;
2
Assuming the condition is false
3
Returning zero, which participates in a condition later
157 }
158
159 SDValue getValue(unsigned R) const {
160 return SDValue(Node, R);
161 }
162
163 /// Return true if this node is an operand of N.
164 bool isOperandOf(const SDNode *N) const;
165
166 /// Return the ValueType of the referenced return value.
167 inline EVT getValueType() const;
168
169 /// Return the simple ValueType of the referenced return value.
170 MVT getSimpleValueType() const {
171 return getValueType().getSimpleVT();
172 }
173
174 /// Returns the size of the value in bits.
175 ///
176 /// If the value type is a scalable vector type, the scalable property will
177 /// be set and the runtime size will be a positive integer multiple of the
178 /// base size.
179 TypeSize getValueSizeInBits() const {
180 return getValueType().getSizeInBits();
181 }
182
183 uint64_t getScalarValueSizeInBits() const {
184 return getValueType().getScalarType().getFixedSizeInBits();
185 }
186
187 // Forwarding methods - These forward to the corresponding methods in SDNode.
188 inline unsigned getOpcode() const;
189 inline unsigned getNumOperands() const;
190 inline const SDValue &getOperand(unsigned i) const;
191 inline uint64_t getConstantOperandVal(unsigned i) const;
192 inline const APInt &getConstantOperandAPInt(unsigned i) const;
193 inline bool isTargetMemoryOpcode() const;
194 inline bool isTargetOpcode() const;
195 inline bool isMachineOpcode() const;
196 inline bool isUndef() const;
197 inline unsigned getMachineOpcode() const;
198 inline const DebugLoc &getDebugLoc() const;
199 inline void dump() const;
200 inline void dump(const SelectionDAG *G) const;
201 inline void dumpr() const;
202 inline void dumpr(const SelectionDAG *G) const;
203
204 /// Return true if this operand (which must be a chain) reaches the
205 /// specified operand without crossing any side-effecting instructions.
206 /// In practice, this looks through token factors and non-volatile loads.
207 /// In order to remain efficient, this only
208 /// looks a couple of nodes in, it does not do an exhaustive search.
209 bool reachesChainWithoutSideEffects(SDValue Dest,
210 unsigned Depth = 2) const;
211
212 /// Return true if there are no nodes using value ResNo of Node.
213 inline bool use_empty() const;
214
215 /// Return true if there is exactly one node using value ResNo of Node.
216 inline bool hasOneUse() const;
217};
218
219template<> struct DenseMapInfo<SDValue> {
220 static inline SDValue getEmptyKey() {
221 SDValue V;
222 V.ResNo = -1U;
223 return V;
224 }
225
226 static inline SDValue getTombstoneKey() {
227 SDValue V;
228 V.ResNo = -2U;
229 return V;
230 }
231
232 static unsigned getHashValue(const SDValue &Val) {
233 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
234 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
235 }
236
237 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
238 return LHS == RHS;
239 }
240};
241
242/// Allow casting operators to work directly on
243/// SDValues as if they were SDNode*'s.
244template<> struct simplify_type<SDValue> {
245 using SimpleType = SDNode *;
246
247 static SimpleType getSimplifiedValue(SDValue &Val) {
248 return Val.getNode();
249 }
250};
251template<> struct simplify_type<const SDValue> {
252 using SimpleType = /*const*/ SDNode *;
253
254 static SimpleType getSimplifiedValue(const SDValue &Val) {
255 return Val.getNode();
256 }
257};
258
259/// Represents a use of a SDNode. This class holds an SDValue,
260/// which records the SDNode being used and the result number, a
261/// pointer to the SDNode using the value, and Next and Prev pointers,
262/// which link together all the uses of an SDNode.
263///
264class SDUse {
265 /// Val - The value being used.
266 SDValue Val;
267 /// User - The user of this value.
268 SDNode *User = nullptr;
269 /// Prev, Next - Pointers to the uses list of the SDNode referred by
270 /// this operand.
271 SDUse **Prev = nullptr;
272 SDUse *Next = nullptr;
273
274public:
275 SDUse() = default;
276 SDUse(const SDUse &U) = delete;
277 SDUse &operator=(const SDUse &) = delete;
278
279 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
280 operator const SDValue&() const { return Val; }
281
282 /// If implicit conversion to SDValue doesn't work, the get() method returns
283 /// the SDValue.
284 const SDValue &get() const { return Val; }
285
286 /// This returns the SDNode that contains this Use.
287 SDNode *getUser() { return User; }
288
289 /// Get the next SDUse in the use list.
290 SDUse *getNext() const { return Next; }
291
292 /// Convenience function for get().getNode().
293 SDNode *getNode() const { return Val.getNode(); }
294 /// Convenience function for get().getResNo().
295 unsigned getResNo() const { return Val.getResNo(); }
296 /// Convenience function for get().getValueType().
297 EVT getValueType() const { return Val.getValueType(); }
298
299 /// Convenience function for get().operator==
300 bool operator==(const SDValue &V) const {
301 return Val == V;
302 }
303
304 /// Convenience function for get().operator!=
305 bool operator!=(const SDValue &V) const {
306 return Val != V;
307 }
308
309 /// Convenience function for get().operator<
310 bool operator<(const SDValue &V) const {
311 return Val < V;
312 }
313
314private:
315 friend class SelectionDAG;
316 friend class SDNode;
317 // TODO: unfriend HandleSDNode once we fix its operand handling.
318 friend class HandleSDNode;
319
320 void setUser(SDNode *p) { User = p; }
321
322 /// Remove this use from its existing use list, assign it the
323 /// given value, and add it to the new value's node's use list.
324 inline void set(const SDValue &V);
325 /// Like set, but only supports initializing a newly-allocated
326 /// SDUse with a non-null value.
327 inline void setInitial(const SDValue &V);
328 /// Like set, but only sets the Node portion of the value,
329 /// leaving the ResNo portion unmodified.
330 inline void setNode(SDNode *N);
331
332 void addToList(SDUse **List) {
333 Next = *List;
334 if (Next) Next->Prev = &Next;
335 Prev = List;
336 *List = this;
337 }
338
339 void removeFromList() {
340 *Prev = Next;
341 if (Next) Next->Prev = Prev;
342 }
343};
344
345/// simplify_type specializations - Allow casting operators to work directly on
346/// SDValues as if they were SDNode*'s.
347template<> struct simplify_type<SDUse> {
348 using SimpleType = SDNode *;
349
350 static SimpleType getSimplifiedValue(SDUse &Val) {
351 return Val.getNode();
352 }
353};
354
355/// These are IR-level optimization flags that may be propagated to SDNodes.
356/// TODO: This data structure should be shared by the IR optimizer and the
357/// the backend.
358struct SDNodeFlags {
359private:
360 bool NoUnsignedWrap : 1;
361 bool NoSignedWrap : 1;
362 bool Exact : 1;
363 bool NoNaNs : 1;
364 bool NoInfs : 1;
365 bool NoSignedZeros : 1;
366 bool AllowReciprocal : 1;
367 bool AllowContract : 1;
368 bool ApproximateFuncs : 1;
369 bool AllowReassociation : 1;
370
371 // We assume instructions do not raise floating-point exceptions by default,
372 // and only those marked explicitly may do so. We could choose to represent
373 // this via a positive "FPExcept" flags like on the MI level, but having a
374 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
375 // intersection logic more straightforward.
376 bool NoFPExcept : 1;
377
378public:
379 /// Default constructor turns off all optimization flags.
380 SDNodeFlags()
381 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
382 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
383 AllowContract(false), ApproximateFuncs(false),
384 AllowReassociation(false), NoFPExcept(false) {}
385
386 /// Propagate the fast-math-flags from an IR FPMathOperator.
387 void copyFMF(const FPMathOperator &FPMO) {
388 setNoNaNs(FPMO.hasNoNaNs());
389 setNoInfs(FPMO.hasNoInfs());
390 setNoSignedZeros(FPMO.hasNoSignedZeros());
391 setAllowReciprocal(FPMO.hasAllowReciprocal());
392 setAllowContract(FPMO.hasAllowContract());
393 setApproximateFuncs(FPMO.hasApproxFunc());
394 setAllowReassociation(FPMO.hasAllowReassoc());
395 }
396
397 // These are mutators for each flag.
398 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
399 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
400 void setExact(bool b) { Exact = b; }
401 void setNoNaNs(bool b) { NoNaNs = b; }
402 void setNoInfs(bool b) { NoInfs = b; }
403 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
404 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
405 void setAllowContract(bool b) { AllowContract = b; }
406 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
407 void setAllowReassociation(bool b) { AllowReassociation = b; }
408 void setNoFPExcept(bool b) { NoFPExcept = b; }
409
410 // These are accessors for each flag.
411 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
412 bool hasNoSignedWrap() const { return NoSignedWrap; }
413 bool hasExact() const { return Exact; }
414 bool hasNoNaNs() const { return NoNaNs; }
415 bool hasNoInfs() const { return NoInfs; }
416 bool hasNoSignedZeros() const { return NoSignedZeros; }
417 bool hasAllowReciprocal() const { return AllowReciprocal; }
418 bool hasAllowContract() const { return AllowContract; }
419 bool hasApproximateFuncs() const { return ApproximateFuncs; }
420 bool hasAllowReassociation() const { return AllowReassociation; }
421 bool hasNoFPExcept() const { return NoFPExcept; }
422
423 /// Clear any flags in this flag set that aren't also set in Flags. All
424 /// flags will be cleared if Flags are undefined.
425 void intersectWith(const SDNodeFlags Flags) {
426 NoUnsignedWrap &= Flags.NoUnsignedWrap;
427 NoSignedWrap &= Flags.NoSignedWrap;
428 Exact &= Flags.Exact;
429 NoNaNs &= Flags.NoNaNs;
430 NoInfs &= Flags.NoInfs;
431 NoSignedZeros &= Flags.NoSignedZeros;
432 AllowReciprocal &= Flags.AllowReciprocal;
433 AllowContract &= Flags.AllowContract;
434 ApproximateFuncs &= Flags.ApproximateFuncs;
435 AllowReassociation &= Flags.AllowReassociation;
436 NoFPExcept &= Flags.NoFPExcept;
437 }
438};
439
440/// Represents one node in the SelectionDAG.
441///
442class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
443private:
444 /// The operation that this node performs.
445 int16_t NodeType;
446
447protected:
448 // We define a set of mini-helper classes to help us interpret the bits in our
449 // SubclassData. These are designed to fit within a uint16_t so they pack
450 // with NodeType.
451
452#if defined(_AIX) && (!defined(__GNUC__4) || defined(__ibmxl__))
453// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
454// and give the `pack` pragma push semantics.
455#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
456#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
457#else
458#define BEGIN_TWO_BYTE_PACK()
459#define END_TWO_BYTE_PACK()
460#endif
461
462BEGIN_TWO_BYTE_PACK()
463 class SDNodeBitfields {
464 friend class SDNode;
465 friend class MemIntrinsicSDNode;
466 friend class MemSDNode;
467 friend class SelectionDAG;
468
469 uint16_t HasDebugValue : 1;
470 uint16_t IsMemIntrinsic : 1;
471 uint16_t IsDivergent : 1;
472 };
473 enum { NumSDNodeBits = 3 };
474
475 class ConstantSDNodeBitfields {
476 friend class ConstantSDNode;
477
478 uint16_t : NumSDNodeBits;
479
480 uint16_t IsOpaque : 1;
481 };
482
483 class MemSDNodeBitfields {
484 friend class MemSDNode;
485 friend class MemIntrinsicSDNode;
486 friend class AtomicSDNode;
487
488 uint16_t : NumSDNodeBits;
489
490 uint16_t IsVolatile : 1;
491 uint16_t IsNonTemporal : 1;
492 uint16_t IsDereferenceable : 1;
493 uint16_t IsInvariant : 1;
494 };
495 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
496
497 class LSBaseSDNodeBitfields {
498 friend class LSBaseSDNode;
499 friend class MaskedLoadStoreSDNode;
500 friend class MaskedGatherScatterSDNode;
501
502 uint16_t : NumMemSDNodeBits;
503
504 // This storage is shared between disparate class hierarchies to hold an
505 // enumeration specific to the class hierarchy in use.
506 // LSBaseSDNode => enum ISD::MemIndexedMode
507 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
508 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
509 uint16_t AddressingMode : 3;
510 };
511 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
512
513 class LoadSDNodeBitfields {
514 friend class LoadSDNode;
515 friend class MaskedLoadSDNode;
516
517 uint16_t : NumLSBaseSDNodeBits;
518
519 uint16_t ExtTy : 2; // enum ISD::LoadExtType
520 uint16_t IsExpanding : 1;
521 };
522
523 class StoreSDNodeBitfields {
524 friend class StoreSDNode;
525 friend class MaskedStoreSDNode;
526
527 uint16_t : NumLSBaseSDNodeBits;
528
529 uint16_t IsTruncating : 1;
530 uint16_t IsCompressing : 1;
531 };
532
533 union {
534 char RawSDNodeBits[sizeof(uint16_t)];
535 SDNodeBitfields SDNodeBits;
536 ConstantSDNodeBitfields ConstantSDNodeBits;
537 MemSDNodeBitfields MemSDNodeBits;
538 LSBaseSDNodeBitfields LSBaseSDNodeBits;
539 LoadSDNodeBitfields LoadSDNodeBits;
540 StoreSDNodeBitfields StoreSDNodeBits;
541 };
542END_TWO_BYTE_PACK()
543#undef BEGIN_TWO_BYTE_PACK
544#undef END_TWO_BYTE_PACK
545
546 // RawSDNodeBits must cover the entirety of the union. This means that all of
547 // the union's members must have size <= RawSDNodeBits. We write the RHS as
548 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
549 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
550 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
551 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
552 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
553 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
554 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
555
556private:
557 friend class SelectionDAG;
558 // TODO: unfriend HandleSDNode once we fix its operand handling.
559 friend class HandleSDNode;
560
561 /// Unique id per SDNode in the DAG.
562 int NodeId = -1;
563
564 /// The values that are used by this operation.
565 SDUse *OperandList = nullptr;
566
567 /// The types of the values this node defines. SDNode's may
568 /// define multiple values simultaneously.
569 const EVT *ValueList;
570
571 /// List of uses for this SDNode.
572 SDUse *UseList = nullptr;
573
574 /// The number of entries in the Operand/Value list.
575 unsigned short NumOperands = 0;
576 unsigned short NumValues;
577
578 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
579 // original LLVM instructions.
580 // This is used for turning off scheduling, because we'll forgo
581 // the normal scheduling algorithms and output the instructions according to
582 // this ordering.
583 unsigned IROrder;
584
585 /// Source line information.
586 DebugLoc debugLoc;
587
588 /// Return a pointer to the specified value type.
589 static const EVT *getValueTypeList(EVT VT);
590
591 SDNodeFlags Flags;
592
593public:
594 /// Unique and persistent id per SDNode in the DAG.
595 /// Used for debug printing.
596 uint16_t PersistentId;
597
598 //===--------------------------------------------------------------------===//
599 // Accessors
600 //
601
602 /// Return the SelectionDAG opcode value for this node. For
603 /// pre-isel nodes (those for which isMachineOpcode returns false), these
604 /// are the opcode values in the ISD and <target>ISD namespaces. For
605 /// post-isel opcodes, see getMachineOpcode.
606 unsigned getOpcode() const { return (unsigned short)NodeType; }
607
608 /// Test if this node has a target-specific opcode (in the
609 /// \<target\>ISD namespace).
610 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
611
612 /// Test if this node has a target-specific opcode that may raise
613 /// FP exceptions (in the \<target\>ISD namespace and greater than
614 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
615 /// opcode are currently automatically considered to possibly raise
616 /// FP exceptions as well.
617 bool isTargetStrictFPOpcode() const {
618 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
619 }
620
621 /// Test if this node has a target-specific
622 /// memory-referencing opcode (in the \<target\>ISD namespace and
623 /// greater than FIRST_TARGET_MEMORY_OPCODE).
624 bool isTargetMemoryOpcode() const {
625 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
626 }
627
628 /// Return true if the type of the node type undefined.
629 bool isUndef() const { return NodeType == ISD::UNDEF; }
30
Assuming field 'NodeType' is not equal to UNDEF
31
Returning zero, which participates in a condition later
630
631 /// Test if this node is a memory intrinsic (with valid pointer information).
632 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
633 /// non-memory intrinsics (with chains) that are not really instances of
634 /// MemSDNode. For such nodes, we need some extra state to determine the
635 /// proper classof relationship.
636 bool isMemIntrinsic() const {
637 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
638 NodeType == ISD::INTRINSIC_VOID) &&
639 SDNodeBits.IsMemIntrinsic;
640 }
641
642 /// Test if this node is a strict floating point pseudo-op.
643 bool isStrictFPOpcode() {
644 switch (NodeType) {
645 default:
646 return false;
647 case ISD::STRICT_FP16_TO_FP:
648 case ISD::STRICT_FP_TO_FP16:
649#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
650 case ISD::STRICT_##DAGN:
651#include "llvm/IR/ConstrainedOps.def"
652 return true;
653 }
654 }
655
656 /// Test if this node has a post-isel opcode, directly
657 /// corresponding to a MachineInstr opcode.
658 bool isMachineOpcode() const { return NodeType < 0; }
659
660 /// This may only be called if isMachineOpcode returns
661 /// true. It returns the MachineInstr opcode value that the node's opcode
662 /// corresponds to.
663 unsigned getMachineOpcode() const {
664 assert(isMachineOpcode() && "Not a MachineInstr opcode!")((isMachineOpcode() && "Not a MachineInstr opcode!") ?
static_cast<void> (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 664, __PRETTY_FUNCTION__))
;
665 return ~NodeType;
666 }
667
668 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
669 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
670
671 bool isDivergent() const { return SDNodeBits.IsDivergent; }
672
673 /// Return true if there are no uses of this node.
674 bool use_empty() const { return UseList == nullptr; }
675
676 /// Return true if there is exactly one use of this node.
677 bool hasOneUse() const {
678 return !use_empty() && std::next(use_begin()) == use_end();
679 }
680
681 /// Return the number of uses of this node. This method takes
682 /// time proportional to the number of uses.
683 size_t use_size() const { return std::distance(use_begin(), use_end()); }
684
685 /// Return the unique node id.
686 int getNodeId() const { return NodeId; }
687
688 /// Set unique node id.
689 void setNodeId(int Id) { NodeId = Id; }
690
691 /// Return the node ordering.
692 unsigned getIROrder() const { return IROrder; }
693
694 /// Set the node ordering.
695 void setIROrder(unsigned Order) { IROrder = Order; }
696
697 /// Return the source location info.
698 const DebugLoc &getDebugLoc() const { return debugLoc; }
699
700 /// Set source location info. Try to avoid this, putting
701 /// it in the constructor is preferable.
702 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
703
704 /// This class provides iterator support for SDUse
705 /// operands that use a specific SDNode.
706 class use_iterator
707 : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
708 friend class SDNode;
709
710 SDUse *Op = nullptr;
711
712 explicit use_iterator(SDUse *op) : Op(op) {}
713
714 public:
715 using reference = std::iterator<std::forward_iterator_tag,
716 SDUse, ptrdiff_t>::reference;
717 using pointer = std::iterator<std::forward_iterator_tag,
718 SDUse, ptrdiff_t>::pointer;
719
720 use_iterator() = default;
721 use_iterator(const use_iterator &I) : Op(I.Op) {}
722
723 bool operator==(const use_iterator &x) const {
724 return Op == x.Op;
725 }
726 bool operator!=(const use_iterator &x) const {
727 return !operator==(x);
728 }
729
730 /// Return true if this iterator is at the end of uses list.
731 bool atEnd() const { return Op == nullptr; }
732
733 // Iterator traversal: forward iteration only.
734 use_iterator &operator++() { // Preincrement
735 assert(Op && "Cannot increment end iterator!")((Op && "Cannot increment end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 735, __PRETTY_FUNCTION__))
;
736 Op = Op->getNext();
737 return *this;
738 }
739
740 use_iterator operator++(int) { // Postincrement
741 use_iterator tmp = *this; ++*this; return tmp;
742 }
743
744 /// Retrieve a pointer to the current user node.
745 SDNode *operator*() const {
746 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 746, __PRETTY_FUNCTION__))
;
747 return Op->getUser();
748 }
749
750 SDNode *operator->() const { return operator*(); }
751
752 SDUse &getUse() const { return *Op; }
753
754 /// Retrieve the operand # of this use in its user.
755 unsigned getOperandNo() const {
756 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 756, __PRETTY_FUNCTION__))
;
757 return (unsigned)(Op - Op->getUser()->OperandList);
758 }
759 };
760
761 /// Provide iteration support to walk over all uses of an SDNode.
762 use_iterator use_begin() const {
763 return use_iterator(UseList);
764 }
765
766 static use_iterator use_end() { return use_iterator(nullptr); }
767
768 inline iterator_range<use_iterator> uses() {
769 return make_range(use_begin(), use_end());
770 }
771 inline iterator_range<use_iterator> uses() const {
772 return make_range(use_begin(), use_end());
773 }
774
775 /// Return true if there are exactly NUSES uses of the indicated value.
776 /// This method ignores uses of other values defined by this operation.
777 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
778
779 /// Return true if there are any use of the indicated value.
780 /// This method ignores uses of other values defined by this operation.
781 bool hasAnyUseOfValue(unsigned Value) const;
782
783 /// Return true if this node is the only use of N.
784 bool isOnlyUserOf(const SDNode *N) const;
785
786 /// Return true if this node is an operand of N.
787 bool isOperandOf(const SDNode *N) const;
788
789 /// Return true if this node is a predecessor of N.
790 /// NOTE: Implemented on top of hasPredecessor and every bit as
791 /// expensive. Use carefully.
792 bool isPredecessorOf(const SDNode *N) const {
793 return N->hasPredecessor(this);
794 }
795
796 /// Return true if N is a predecessor of this node.
797 /// N is either an operand of this node, or can be reached by recursively
798 /// traversing up the operands.
799 /// NOTE: This is an expensive method. Use it carefully.
800 bool hasPredecessor(const SDNode *N) const;
801
802 /// Returns true if N is a predecessor of any node in Worklist. This
803 /// helper keeps Visited and Worklist sets externally to allow unions
804 /// searches to be performed in parallel, caching of results across
805 /// queries and incremental addition to Worklist. Stops early if N is
806 /// found but will resume. Remember to clear Visited and Worklists
807 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
808 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
809 /// topologically ordered (Operands have strictly smaller node id) and search
810 /// can be pruned leveraging this.
811 static bool hasPredecessorHelper(const SDNode *N,
812 SmallPtrSetImpl<const SDNode *> &Visited,
813 SmallVectorImpl<const SDNode *> &Worklist,
814 unsigned int MaxSteps = 0,
815 bool TopologicalPrune = false) {
816 SmallVector<const SDNode *, 8> DeferredNodes;
817 if (Visited.count(N))
818 return true;
819
820 // Node Id's are assigned in three places: As a topological
821 // ordering (> 0), during legalization (results in values set to
822 // 0), new nodes (set to -1). If N has a topolgical id then we
823 // know that all nodes with ids smaller than it cannot be
824 // successors and we need not check them. Filter out all node
825 // that can't be matches. We add them to the worklist before exit
826 // in case of multiple calls. Note that during selection the topological id
827 // may be violated if a node's predecessor is selected before it. We mark
828 // this at selection negating the id of unselected successors and
829 // restricting topological pruning to positive ids.
830
831 int NId = N->getNodeId();
832 // If we Invalidated the Id, reconstruct original NId.
833 if (NId < -1)
834 NId = -(NId + 1);
835
836 bool Found = false;
837 while (!Worklist.empty()) {
838 const SDNode *M = Worklist.pop_back_val();
839 int MId = M->getNodeId();
840 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
841 (MId > 0) && (MId < NId)) {
842 DeferredNodes.push_back(M);
843 continue;
844 }
845 for (const SDValue &OpV : M->op_values()) {
846 SDNode *Op = OpV.getNode();
847 if (Visited.insert(Op).second)
848 Worklist.push_back(Op);
849 if (Op == N)
850 Found = true;
851 }
852 if (Found)
853 break;
854 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
855 break;
856 }
857 // Push deferred nodes back on worklist.
858 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
859 // If we bailed early, conservatively return found.
860 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
861 return true;
862 return Found;
863 }
864
865 /// Return true if all the users of N are contained in Nodes.
866 /// NOTE: Requires at least one match, but doesn't require them all.
867 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
868
869 /// Return the number of values used by this operation.
870 unsigned getNumOperands() const { return NumOperands; }
871
872 /// Return the maximum number of operands that a SDNode can hold.
873 static constexpr size_t getMaxNumOperands() {
874 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
875 }
876
877 /// Helper method returns the integer value of a ConstantSDNode operand.
878 inline uint64_t getConstantOperandVal(unsigned Num) const;
879
880 /// Helper method returns the APInt of a ConstantSDNode operand.
881 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
882
883 const SDValue &getOperand(unsigned Num) const {
884 assert(Num < NumOperands && "Invalid child # of SDNode!")((Num < NumOperands && "Invalid child # of SDNode!"
) ? static_cast<void> (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 884, __PRETTY_FUNCTION__))
;
885 return OperandList[Num];
886 }
887
888 using op_iterator = SDUse *;
889
890 op_iterator op_begin() const { return OperandList; }
891 op_iterator op_end() const { return OperandList+NumOperands; }
892 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
893
894 /// Iterator for directly iterating over the operand SDValue's.
895 struct value_op_iterator
896 : iterator_adaptor_base<value_op_iterator, op_iterator,
897 std::random_access_iterator_tag, SDValue,
898 ptrdiff_t, value_op_iterator *,
899 value_op_iterator *> {
900 explicit value_op_iterator(SDUse *U = nullptr)
901 : iterator_adaptor_base(U) {}
902
903 const SDValue &operator*() const { return I->get(); }
904 };
905
906 iterator_range<value_op_iterator> op_values() const {
907 return make_range(value_op_iterator(op_begin()),
908 value_op_iterator(op_end()));
909 }
910
911 SDVTList getVTList() const {
912 SDVTList X = { ValueList, NumValues };
913 return X;
914 }
915
916 /// If this node has a glue operand, return the node
917 /// to which the glue operand points. Otherwise return NULL.
918 SDNode *getGluedNode() const {
919 if (getNumOperands() != 0 &&
920 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
921 return getOperand(getNumOperands()-1).getNode();
922 return nullptr;
923 }
924
925 /// If this node has a glue value with a user, return
926 /// the user (there is at most one). Otherwise return NULL.
927 SDNode *getGluedUser() const {
928 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
929 if (UI.getUse().get().getValueType() == MVT::Glue)
930 return *UI;
931 return nullptr;
932 }
933
934 const SDNodeFlags getFlags() const { return Flags; }
935 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
936
937 /// Clear any flags in this node that aren't also set in Flags.
938 /// If Flags is not in a defined state then this has no effect.
939 void intersectFlagsWith(const SDNodeFlags Flags);
940
941 /// Return the number of values defined/returned by this operator.
942 unsigned getNumValues() const { return NumValues; }
943
944 /// Return the type of a specified result.
945 EVT getValueType(unsigned ResNo) const {
946 assert(ResNo < NumValues && "Illegal result number!")((ResNo < NumValues && "Illegal result number!") ?
static_cast<void> (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 946, __PRETTY_FUNCTION__))
;
947 return ValueList[ResNo];
948 }
949
950 /// Return the type of a specified result as a simple type.
951 MVT getSimpleValueType(unsigned ResNo) const {
952 return getValueType(ResNo).getSimpleVT();
953 }
954
955 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
956 ///
957 /// If the value type is a scalable vector type, the scalable property will
958 /// be set and the runtime size will be a positive integer multiple of the
959 /// base size.
960 TypeSize getValueSizeInBits(unsigned ResNo) const {
961 return getValueType(ResNo).getSizeInBits();
962 }
963
964 using value_iterator = const EVT *;
965
966 value_iterator value_begin() const { return ValueList; }
967 value_iterator value_end() const { return ValueList+NumValues; }
968 iterator_range<value_iterator> values() const {
969 return llvm::make_range(value_begin(), value_end());
970 }
971
972 /// Return the opcode of this operation for printing.
973 std::string getOperationName(const SelectionDAG *G = nullptr) const;
974 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
975 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
976 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
977 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
978 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
979
980 /// Print a SelectionDAG node and all children down to
981 /// the leaves. The given SelectionDAG allows target-specific nodes
982 /// to be printed in human-readable form. Unlike printr, this will
983 /// print the whole DAG, including children that appear multiple
984 /// times.
985 ///
986 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
987
988 /// Print a SelectionDAG node and children up to
989 /// depth "depth." The given SelectionDAG allows target-specific
990 /// nodes to be printed in human-readable form. Unlike printr, this
991 /// will print children that appear multiple times wherever they are
992 /// used.
993 ///
994 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
995 unsigned depth = 100) const;
996
997 /// Dump this node, for debugging.
998 void dump() const;
999
1000 /// Dump (recursively) this node and its use-def subgraph.
1001 void dumpr() const;
1002
1003 /// Dump this node, for debugging.
1004 /// The given SelectionDAG allows target-specific nodes to be printed
1005 /// in human-readable form.
1006 void dump(const SelectionDAG *G) const;
1007
1008 /// Dump (recursively) this node and its use-def subgraph.
1009 /// The given SelectionDAG allows target-specific nodes to be printed
1010 /// in human-readable form.
1011 void dumpr(const SelectionDAG *G) const;
1012
1013 /// printrFull to dbgs(). The given SelectionDAG allows
1014 /// target-specific nodes to be printed in human-readable form.
1015 /// Unlike dumpr, this will print the whole DAG, including children
1016 /// that appear multiple times.
1017 void dumprFull(const SelectionDAG *G = nullptr) const;
1018
1019 /// printrWithDepth to dbgs(). The given
1020 /// SelectionDAG allows target-specific nodes to be printed in
1021 /// human-readable form. Unlike dumpr, this will print children
1022 /// that appear multiple times wherever they are used.
1023 ///
1024 void dumprWithDepth(const SelectionDAG *G = nullptr,
1025 unsigned depth = 100) const;
1026
1027 /// Gather unique data for the node.
1028 void Profile(FoldingSetNodeID &ID) const;
1029
1030 /// This method should only be used by the SDUse class.
1031 void addUse(SDUse &U) { U.addToList(&UseList); }
1032
1033protected:
1034 static SDVTList getSDVTList(EVT VT) {
1035 SDVTList Ret = { getValueTypeList(VT), 1 };
1036 return Ret;
1037 }
1038
1039 /// Create an SDNode.
1040 ///
1041 /// SDNodes are created without any operands, and never own the operand
1042 /// storage. To add operands, see SelectionDAG::createOperands.
1043 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1044 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1045 IROrder(Order), debugLoc(std::move(dl)) {
1046 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1047 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((debugLoc.hasTrivialDestructor() && "Expected trivial destructor"
) ? static_cast<void> (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1047, __PRETTY_FUNCTION__))
;
1048 assert(NumValues == VTs.NumVTs &&((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1049, __PRETTY_FUNCTION__))
1049 "NumValues wasn't wide enough for its operands!")((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1049, __PRETTY_FUNCTION__))
;
1050 }
1051
1052 /// Release the operands and set this node to have zero operands.
1053 void DropOperands();
1054};
1055
1056/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1057/// into SDNode creation functions.
1058/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1059/// from the original Instruction, and IROrder is the ordinal position of
1060/// the instruction.
1061/// When an SDNode is created after the DAG is being built, both DebugLoc and
1062/// the IROrder are propagated from the original SDNode.
1063/// So SDLoc class provides two constructors besides the default one, one to
1064/// be used by the DAGBuilder, the other to be used by others.
1065class SDLoc {
1066private:
1067 DebugLoc DL;
1068 int IROrder = 0;
1069
1070public:
1071 SDLoc() = default;
1072 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1073 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1074 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1075 assert(Order >= 0 && "bad IROrder")((Order >= 0 && "bad IROrder") ? static_cast<void
> (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1075, __PRETTY_FUNCTION__))
;
1076 if (I)
1077 DL = I->getDebugLoc();
1078 }
1079
1080 unsigned getIROrder() const { return IROrder; }
1081 const DebugLoc &getDebugLoc() const { return DL; }
1082};
1083
1084// Define inline functions from the SDValue class.
1085
1086inline SDValue::SDValue(SDNode *node, unsigned resno)
1087 : Node(node), ResNo(resno) {
1088 // Explicitly check for !ResNo to avoid use-after-free, because there are
1089 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1090 // combines.
1091 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1092, __PRETTY_FUNCTION__))
1092 "Invalid result number for the given node!")(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1092, __PRETTY_FUNCTION__))
;
1093 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? static_cast<void> (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1093, __PRETTY_FUNCTION__))
;
1094}
1095
1096inline unsigned SDValue::getOpcode() const {
1097 return Node->getOpcode();
1098}
1099
1100inline EVT SDValue::getValueType() const {
1101 return Node->getValueType(ResNo);
1102}
1103
1104inline unsigned SDValue::getNumOperands() const {
1105 return Node->getNumOperands();
1106}
1107
1108inline const SDValue &SDValue::getOperand(unsigned i) const {
1109 return Node->getOperand(i);
1110}
1111
1112inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1113 return Node->getConstantOperandVal(i);
1114}
1115
1116inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1117 return Node->getConstantOperandAPInt(i);
1118}
1119
1120inline bool SDValue::isTargetOpcode() const {
1121 return Node->isTargetOpcode();
1122}
1123
1124inline bool SDValue::isTargetMemoryOpcode() const {
1125 return Node->isTargetMemoryOpcode();
1126}
1127
1128inline bool SDValue::isMachineOpcode() const {
1129 return Node->isMachineOpcode();
1130}
1131
1132inline unsigned SDValue::getMachineOpcode() const {
1133 return Node->getMachineOpcode();
1134}
1135
1136inline bool SDValue::isUndef() const {
1137 return Node->isUndef();
29
Calling 'SDNode::isUndef'
32
Returning from 'SDNode::isUndef'
33
Returning zero, which participates in a condition later
1138}
1139
1140inline bool SDValue::use_empty() const {
1141 return !Node->hasAnyUseOfValue(ResNo);
1142}
1143
1144inline bool SDValue::hasOneUse() const {
1145 return Node->hasNUsesOfValue(1, ResNo);
1146}
1147
1148inline const DebugLoc &SDValue::getDebugLoc() const {
1149 return Node->getDebugLoc();
1150}
1151
1152inline void SDValue::dump() const {
1153 return Node->dump();
1154}
1155
1156inline void SDValue::dump(const SelectionDAG *G) const {
1157 return Node->dump(G);
1158}
1159
1160inline void SDValue::dumpr() const {
1161 return Node->dumpr();
1162}
1163
1164inline void SDValue::dumpr(const SelectionDAG *G) const {
1165 return Node->dumpr(G);
1166}
1167
1168// Define inline functions from the SDUse class.
1169
1170inline void SDUse::set(const SDValue &V) {
1171 if (Val.getNode()) removeFromList();
1172 Val = V;
1173 if (V.getNode()) V.getNode()->addUse(*this);
1174}
1175
1176inline void SDUse::setInitial(const SDValue &V) {
1177 Val = V;
1178 V.getNode()->addUse(*this);
1179}
1180
1181inline void SDUse::setNode(SDNode *N) {
1182 if (Val.getNode()) removeFromList();
1183 Val.setNode(N);
1184 if (N) N->addUse(*this);
1185}
1186
1187/// This class is used to form a handle around another node that
1188/// is persistent and is updated across invocations of replaceAllUsesWith on its
1189/// operand. This node should be directly created by end-users and not added to
1190/// the AllNodes list.
1191class HandleSDNode : public SDNode {
1192 SDUse Op;
1193
1194public:
1195 explicit HandleSDNode(SDValue X)
1196 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1197 // HandleSDNodes are never inserted into the DAG, so they won't be
1198 // auto-numbered. Use ID 65535 as a sentinel.
1199 PersistentId = 0xffff;
1200
1201 // Manually set up the operand list. This node type is special in that it's
1202 // always stack allocated and SelectionDAG does not manage its operands.
1203 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1204 // be so special.
1205 Op.setUser(this);
1206 Op.setInitial(X);
1207 NumOperands = 1;
1208 OperandList = &Op;
1209 }
1210 ~HandleSDNode();
1211
1212 const SDValue &getValue() const { return Op; }
1213};
1214
1215class AddrSpaceCastSDNode : public SDNode {
1216private:
1217 unsigned SrcAddrSpace;
1218 unsigned DestAddrSpace;
1219
1220public:
1221 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1222 unsigned SrcAS, unsigned DestAS);
1223
1224 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1225 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1226
1227 static bool classof(const SDNode *N) {
1228 return N->getOpcode() == ISD::ADDRSPACECAST;
1229 }
1230};
1231
1232/// This is an abstract virtual class for memory operations.
1233class MemSDNode : public SDNode {
1234private:
1235 // VT of in-memory value.
1236 EVT MemoryVT;
1237
1238protected:
1239 /// Memory reference information.
1240 MachineMemOperand *MMO;
1241
1242public:
1243 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1244 EVT memvt, MachineMemOperand *MMO);
1245
1246 bool readMem() const { return MMO->isLoad(); }
1247 bool writeMem() const { return MMO->isStore(); }
1248
1249 /// Returns alignment and volatility of the memory access
1250 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1251 Align getAlign() const { return MMO->getAlign(); }
1252 LLVM_ATTRIBUTE_DEPRECATED(unsigned getOriginalAlignment() const,unsigned getOriginalAlignment() const __attribute__((deprecated
("Use getOriginalAlign() instead")))
1253 "Use getOriginalAlign() instead")unsigned getOriginalAlignment() const __attribute__((deprecated
("Use getOriginalAlign() instead")))
{
1254 return MMO->getBaseAlign().value();
1255 }
1256 // FIXME: Remove once transition to getAlign is over.
1257 unsigned getAlignment() const { return MMO->getAlign().value(); }
1258
1259 /// Return the SubclassData value, without HasDebugValue. This contains an
1260 /// encoding of the volatile flag, as well as bits used by subclasses. This
1261 /// function should only be used to compute a FoldingSetNodeID value.
1262 /// The HasDebugValue bit is masked out because CSE map needs to match
1263 /// nodes with debug info with nodes without debug info. Same is about
1264 /// isDivergent bit.
1265 unsigned getRawSubclassData() const {
1266 uint16_t Data;
1267 union {
1268 char RawSDNodeBits[sizeof(uint16_t)];
1269 SDNodeBitfields SDNodeBits;
1270 };
1271 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1272 SDNodeBits.HasDebugValue = 0;
1273 SDNodeBits.IsDivergent = false;
1274 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1275 return Data;
1276 }
1277
1278 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1279 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1280 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1281 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1282
1283 // Returns the offset from the location of the access.
1284 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1285
1286 /// Returns the AA info that describes the dereference.
1287 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1288
1289 /// Returns the Ranges that describes the dereference.
1290 const MDNode *getRanges() const { return MMO->getRanges(); }
1291
1292 /// Returns the synchronization scope ID for this memory operation.
1293 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1294
1295 /// Return the atomic ordering requirements for this memory operation. For
1296 /// cmpxchg atomic operations, return the atomic ordering requirements when
1297 /// store occurs.
1298 AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
1299
1300 /// Return true if the memory operation ordering is Unordered or higher.
1301 bool isAtomic() const { return MMO->isAtomic(); }
1302
1303 /// Returns true if the memory operation doesn't imply any ordering
1304 /// constraints on surrounding memory operations beyond the normal memory
1305 /// aliasing rules.
1306 bool isUnordered() const { return MMO->isUnordered(); }
1307
1308 /// Returns true if the memory operation is neither atomic or volatile.
1309 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1310
1311 /// Return the type of the in-memory value.
1312 EVT getMemoryVT() const { return MemoryVT; }
1313
1314 /// Return a MachineMemOperand object describing the memory
1315 /// reference performed by operation.
1316 MachineMemOperand *getMemOperand() const { return MMO; }
1317
1318 const MachinePointerInfo &getPointerInfo() const {
1319 return MMO->getPointerInfo();
1320 }
1321
1322 /// Return the address space for the associated pointer
1323 unsigned getAddressSpace() const {
1324 return getPointerInfo().getAddrSpace();
1325 }
1326
1327 /// Update this MemSDNode's MachineMemOperand information
1328 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1329 /// This must only be used when the new alignment applies to all users of
1330 /// this MachineMemOperand.
1331 void refineAlignment(const MachineMemOperand *NewMMO) {
1332 MMO->refineAlignment(NewMMO);
1333 }
1334
1335 const SDValue &getChain() const { return getOperand(0); }
1336
1337 const SDValue &getBasePtr() const {
1338 switch (getOpcode()) {
1339 case ISD::STORE:
1340 case ISD::MSTORE:
1341 return getOperand(2);
1342 case ISD::MGATHER:
1343 case ISD::MSCATTER:
1344 return getOperand(3);
1345 default:
1346 return getOperand(1);
1347 }
1348 }
1349
1350 // Methods to support isa and dyn_cast
1351 static bool classof(const SDNode *N) {
1352 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1353 // with either an intrinsic or a target opcode.
1354 return N->getOpcode() == ISD::LOAD ||
1355 N->getOpcode() == ISD::STORE ||
1356 N->getOpcode() == ISD::PREFETCH ||
1357 N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1358 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1359 N->getOpcode() == ISD::ATOMIC_SWAP ||
1360 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1361 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1362 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1363 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1364 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1365 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1366 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1367 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1368 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1369 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1370 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1371 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1372 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1373 N->getOpcode() == ISD::ATOMIC_LOAD ||
1374 N->getOpcode() == ISD::ATOMIC_STORE ||
1375 N->getOpcode() == ISD::MLOAD ||
1376 N->getOpcode() == ISD::MSTORE ||
1377 N->getOpcode() == ISD::MGATHER ||
1378 N->getOpcode() == ISD::MSCATTER ||
1379 N->isMemIntrinsic() ||
1380 N->isTargetMemoryOpcode();
1381 }
1382};
1383
1384/// This is an SDNode representing atomic operations.
1385class AtomicSDNode : public MemSDNode {
1386public:
1387 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1388 EVT MemVT, MachineMemOperand *MMO)
1389 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1390 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1391, __PRETTY_FUNCTION__))
1391 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1391, __PRETTY_FUNCTION__))
;
1392 }
1393
1394 const SDValue &getBasePtr() const { return getOperand(1); }
1395 const SDValue &getVal() const { return getOperand(2); }
1396
1397 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1398 /// otherwise.
1399 bool isCompareAndSwap() const {
1400 unsigned Op = getOpcode();
1401 return Op == ISD::ATOMIC_CMP_SWAP ||
1402 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1403 }
1404
1405 /// For cmpxchg atomic operations, return the atomic ordering requirements
1406 /// when store does not occur.
1407 AtomicOrdering getFailureOrdering() const {
1408 assert(isCompareAndSwap() && "Must be cmpxchg operation")((isCompareAndSwap() && "Must be cmpxchg operation") ?
static_cast<void> (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1408, __PRETTY_FUNCTION__))
;
1409 return MMO->getFailureOrdering();
1410 }
1411
1412 // Methods to support isa and dyn_cast
1413 static bool classof(const SDNode *N) {
1414 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1415 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1416 N->getOpcode() == ISD::ATOMIC_SWAP ||
1417 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1418 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1419 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1420 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1421 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1422 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1423 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1424 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1425 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1426 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1427 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1428 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1429 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1430 N->getOpcode() == ISD::ATOMIC_LOAD ||
1431 N->getOpcode() == ISD::ATOMIC_STORE;
1432 }
1433};
1434
1435/// This SDNode is used for target intrinsics that touch
1436/// memory and need an associated MachineMemOperand. Its opcode may be
1437/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1438/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1439class MemIntrinsicSDNode : public MemSDNode {
1440public:
1441 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1442 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1443 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1444 SDNodeBits.IsMemIntrinsic = true;
1445 }
1446
1447 // Methods to support isa and dyn_cast
1448 static bool classof(const SDNode *N) {
1449 // We lower some target intrinsics to their target opcode
1450 // early a node with a target opcode can be of this class
1451 return N->isMemIntrinsic() ||
1452 N->getOpcode() == ISD::PREFETCH ||
1453 N->isTargetMemoryOpcode();
1454 }
1455};
1456
1457/// This SDNode is used to implement the code generator
1458/// support for the llvm IR shufflevector instruction. It combines elements
1459/// from two input vectors into a new input vector, with the selection and
1460/// ordering of elements determined by an array of integers, referred to as
1461/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1462/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1463/// An index of -1 is treated as undef, such that the code generator may put
1464/// any value in the corresponding element of the result.
1465class ShuffleVectorSDNode : public SDNode {
1466 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1467 // is freed when the SelectionDAG object is destroyed.
1468 const int *Mask;
1469
1470protected:
1471 friend class SelectionDAG;
1472
1473 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1474 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1475
1476public:
1477 ArrayRef<int> getMask() const {
1478 EVT VT = getValueType(0);
1479 return makeArrayRef(Mask, VT.getVectorNumElements());
1480 }
1481
1482 int getMaskElt(unsigned Idx) const {
1483 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((Idx < getValueType(0).getVectorNumElements() && "Idx out of range!"
) ? static_cast<void> (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1483, __PRETTY_FUNCTION__))
;
1484 return Mask[Idx];
1485 }
1486
1487 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1488
1489 int getSplatIndex() const {
1490 assert(isSplat() && "Cannot get splat index for non-splat!")((isSplat() && "Cannot get splat index for non-splat!"
) ? static_cast<void> (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1490, __PRETTY_FUNCTION__))
;
1491 EVT VT = getValueType(0);
1492 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1493 if (Mask[i] >= 0)
1494 return Mask[i];
1495
1496 // We can choose any index value here and be correct because all elements
1497 // are undefined. Return 0 for better potential for callers to simplify.
1498 return 0;
1499 }
1500
1501 static bool isSplatMask(const int *Mask, EVT VT);
1502
1503 /// Change values in a shuffle permute mask assuming
1504 /// the two vector operands have swapped position.
1505 static void commuteMask(MutableArrayRef<int> Mask) {
1506 unsigned NumElems = Mask.size();
1507 for (unsigned i = 0; i != NumElems; ++i) {
1508 int idx = Mask[i];
1509 if (idx < 0)
1510 continue;
1511 else if (idx < (int)NumElems)
1512 Mask[i] = idx + NumElems;
1513 else
1514 Mask[i] = idx - NumElems;
1515 }
1516 }
1517
1518 static bool classof(const SDNode *N) {
1519 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1520 }
1521};
1522
1523class ConstantSDNode : public SDNode {
1524 friend class SelectionDAG;
1525
1526 const ConstantInt *Value;
1527
1528 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1529 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1530 getSDVTList(VT)),
1531 Value(val) {
1532 ConstantSDNodeBits.IsOpaque = isOpaque;
1533 }
1534
1535public:
1536 const ConstantInt *getConstantIntValue() const { return Value; }
1537 const APInt &getAPIntValue() const { return Value->getValue(); }
1538 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1539 int64_t getSExtValue() const { return Value->getSExtValue(); }
1540 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1541 return Value->getLimitedValue(Limit);
1542 }
1543 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1544 Align getAlignValue() const { return Value->getAlignValue(); }
1545
1546 bool isOne() const { return Value->isOne(); }
1547 bool isNullValue() const { return Value->isZero(); }
1548 bool isAllOnesValue() const { return Value->isMinusOne(); }
1549
1550 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1551
1552 static bool classof(const SDNode *N) {
1553 return N->getOpcode() == ISD::Constant ||
1554 N->getOpcode() == ISD::TargetConstant;
1555 }
1556};
1557
1558uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1559 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1560}
1561
1562const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1563 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1564}
1565
1566class ConstantFPSDNode : public SDNode {
1567 friend class SelectionDAG;
1568
1569 const ConstantFP *Value;
1570
1571 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1572 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1573 DebugLoc(), getSDVTList(VT)),
1574 Value(val) {}
1575
1576public:
1577 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1578 const ConstantFP *getConstantFPValue() const { return Value; }
1579
1580 /// Return true if the value is positive or negative zero.
1581 bool isZero() const { return Value->isZero(); }
1582
1583 /// Return true if the value is a NaN.
1584 bool isNaN() const { return Value->isNaN(); }
1585
1586 /// Return true if the value is an infinity
1587 bool isInfinity() const { return Value->isInfinity(); }
1588
1589 /// Return true if the value is negative.
1590 bool isNegative() const { return Value->isNegative(); }
1591
1592 /// We don't rely on operator== working on double values, as
1593 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1594 /// As such, this method can be used to do an exact bit-for-bit comparison of
1595 /// two floating point values.
1596
1597 /// We leave the version with the double argument here because it's just so
1598 /// convenient to write "2.0" and the like. Without this function we'd
1599 /// have to duplicate its logic everywhere it's called.
1600 bool isExactlyValue(double V) const {
1601 return Value->getValueAPF().isExactlyValue(V);
1602 }
1603 bool isExactlyValue(const APFloat& V) const;
1604
1605 static bool isValueValidForType(EVT VT, const APFloat& Val);
1606
1607 static bool classof(const SDNode *N) {
1608 return N->getOpcode() == ISD::ConstantFP ||
1609 N->getOpcode() == ISD::TargetConstantFP;
1610 }
1611};
1612
1613/// Returns true if \p V is a constant integer zero.
1614bool isNullConstant(SDValue V);
1615
1616/// Returns true if \p V is an FP constant with a value of positive zero.
1617bool isNullFPConstant(SDValue V);
1618
1619/// Returns true if \p V is an integer constant with all bits set.
1620bool isAllOnesConstant(SDValue V);
1621
1622/// Returns true if \p V is a constant integer one.
1623bool isOneConstant(SDValue V);
1624
1625/// Return the non-bitcasted source operand of \p V if it exists.
1626/// If \p V is not a bitcasted value, it is returned as-is.
1627SDValue peekThroughBitcasts(SDValue V);
1628
1629/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1630/// If \p V is not a bitcasted one-use value, it is returned as-is.
1631SDValue peekThroughOneUseBitcasts(SDValue V);
1632
1633/// Return the non-extracted vector source operand of \p V if it exists.
1634/// If \p V is not an extracted subvector, it is returned as-is.
1635SDValue peekThroughExtractSubvectors(SDValue V);
1636
1637/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1638/// constant is canonicalized to be operand 1.
1639bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1640
1641/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1642ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1643 bool AllowTruncation = false);
1644
1645/// Returns the SDNode if it is a demanded constant splat BuildVector or
1646/// constant int.
1647ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1648 bool AllowUndefs = false,
1649 bool AllowTruncation = false);
1650
1651/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1652ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1653
1654/// Returns the SDNode if it is a demanded constant splat BuildVector or
1655/// constant float.
1656ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1657 bool AllowUndefs = false);
1658
1659/// Return true if the value is a constant 0 integer or a splatted vector of
1660/// a constant 0 integer (with no undefs by default).
1661/// Build vector implicit truncation is not an issue for null values.
1662bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1663
1664/// Return true if the value is a constant 1 integer or a splatted vector of a
1665/// constant 1 integer (with no undefs).
1666/// Does not permit build vector implicit truncation.
1667bool isOneOrOneSplat(SDValue V);
1668
1669/// Return true if the value is a constant -1 integer or a splatted vector of a
1670/// constant -1 integer (with no undefs).
1671/// Does not permit build vector implicit truncation.
1672bool isAllOnesOrAllOnesSplat(SDValue V);
1673
1674class GlobalAddressSDNode : public SDNode {
1675 friend class SelectionDAG;
1676
1677 const GlobalValue *TheGlobal;
1678 int64_t Offset;
1679 unsigned TargetFlags;
1680
1681 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1682 const GlobalValue *GA, EVT VT, int64_t o,
1683 unsigned TF);
1684
1685public:
1686 const GlobalValue *getGlobal() const { return TheGlobal; }
1687 int64_t getOffset() const { return Offset; }
1688 unsigned getTargetFlags() const { return TargetFlags; }
1689 // Return the address space this GlobalAddress belongs to.
1690 unsigned getAddressSpace() const;
1691
1692 static bool classof(const SDNode *N) {
1693 return N->getOpcode() == ISD::GlobalAddress ||
1694 N->getOpcode() == ISD::TargetGlobalAddress ||
1695 N->getOpcode() == ISD::GlobalTLSAddress ||
1696 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1697 }
1698};
1699
1700class FrameIndexSDNode : public SDNode {
1701 friend class SelectionDAG;
1702
1703 int FI;
1704
1705 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1706 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1707 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1708 }
1709
1710public:
1711 int getIndex() const { return FI; }
1712
1713 static bool classof(const SDNode *N) {
1714 return N->getOpcode() == ISD::FrameIndex ||
1715 N->getOpcode() == ISD::TargetFrameIndex;
1716 }
1717};
1718
1719/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1720/// the offet and size that are started/ended in the underlying FrameIndex.
1721class LifetimeSDNode : public SDNode {
1722 friend class SelectionDAG;
1723 int64_t Size;
1724 int64_t Offset; // -1 if offset is unknown.
1725
1726 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1727 SDVTList VTs, int64_t Size, int64_t Offset)
1728 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1729public:
1730 int64_t getFrameIndex() const {
1731 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1732 }
1733
1734 bool hasOffset() const { return Offset >= 0; }
1735 int64_t getOffset() const {
1736 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1736, __PRETTY_FUNCTION__))
;
1737 return Offset;
1738 }
1739 int64_t getSize() const {
1740 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1740, __PRETTY_FUNCTION__))
;
1741 return Size;
1742 }
1743
1744 // Methods to support isa and dyn_cast
1745 static bool classof(const SDNode *N) {
1746 return N->getOpcode() == ISD::LIFETIME_START ||
1747 N->getOpcode() == ISD::LIFETIME_END;
1748 }
1749};
1750
1751class JumpTableSDNode : public SDNode {
1752 friend class SelectionDAG;
1753
1754 int JTI;
1755 unsigned TargetFlags;
1756
1757 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1758 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1759 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1760 }
1761
1762public:
1763 int getIndex() const { return JTI; }
1764 unsigned getTargetFlags() const { return TargetFlags; }
1765
1766 static bool classof(const SDNode *N) {
1767 return N->getOpcode() == ISD::JumpTable ||
1768 N->getOpcode() == ISD::TargetJumpTable;
1769 }
1770};
1771
1772class ConstantPoolSDNode : public SDNode {
1773 friend class SelectionDAG;
1774
1775 union {
1776 const Constant *ConstVal;
1777 MachineConstantPoolValue *MachineCPVal;
1778 } Val;
1779 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1780 Align Alignment; // Minimum alignment requirement of CP.
1781 unsigned TargetFlags;
1782
1783 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1784 Align Alignment, unsigned TF)
1785 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1786 DebugLoc(), getSDVTList(VT)),
1787 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1788 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1788, __PRETTY_FUNCTION__))
;
1789 Val.ConstVal = c;
1790 }
1791
1792 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1793 Align Alignment, unsigned TF)
1794 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1795 DebugLoc(), getSDVTList(VT)),
1796 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1797 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1797, __PRETTY_FUNCTION__))
;
1798 Val.MachineCPVal = v;
1799 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1800 }
1801
1802public:
1803 bool isMachineConstantPoolEntry() const {
1804 return Offset < 0;
1805 }
1806
1807 const Constant *getConstVal() const {
1808 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((!isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1808, __PRETTY_FUNCTION__))
;
1809 return Val.ConstVal;
1810 }
1811
1812 MachineConstantPoolValue *getMachineCPVal() const {
1813 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1813, __PRETTY_FUNCTION__))
;
1814 return Val.MachineCPVal;
1815 }
1816
1817 int getOffset() const {
1818 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1819 }
1820
1821 // Return the alignment of this constant pool object, which is either 0 (for
1822 // default alignment) or the desired value.
1823 Align getAlign() const { return Alignment; }
1824 unsigned getTargetFlags() const { return TargetFlags; }
1825
1826 Type *getType() const;
1827
1828 static bool classof(const SDNode *N) {
1829 return N->getOpcode() == ISD::ConstantPool ||
1830 N->getOpcode() == ISD::TargetConstantPool;
1831 }
1832};
1833
1834/// Completely target-dependent object reference.
1835class TargetIndexSDNode : public SDNode {
1836 friend class SelectionDAG;
1837
1838 unsigned TargetFlags;
1839 int Index;
1840 int64_t Offset;
1841
1842public:
1843 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1844 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1845 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1846
1847 unsigned getTargetFlags() const { return TargetFlags; }
1848 int getIndex() const { return Index; }
1849 int64_t getOffset() const { return Offset; }
1850
1851 static bool classof(const SDNode *N) {
1852 return N->getOpcode() == ISD::TargetIndex;
1853 }
1854};
1855
1856class BasicBlockSDNode : public SDNode {
1857 friend class SelectionDAG;
1858
1859 MachineBasicBlock *MBB;
1860
1861 /// Debug info is meaningful and potentially useful here, but we create
1862 /// blocks out of order when they're jumped to, which makes it a bit
1863 /// harder. Let's see if we need it first.
1864 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1865 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1866 {}
1867
1868public:
1869 MachineBasicBlock *getBasicBlock() const { return MBB; }
1870
1871 static bool classof(const SDNode *N) {
1872 return N->getOpcode() == ISD::BasicBlock;
1873 }
1874};
1875
1876/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1877class BuildVectorSDNode : public SDNode {
1878public:
1879 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1880 explicit BuildVectorSDNode() = delete;
1881
1882 /// Check if this is a constant splat, and if so, find the
1883 /// smallest element size that splats the vector. If MinSplatBits is
1884 /// nonzero, the element size must be at least that large. Note that the
1885 /// splat element may be the entire vector (i.e., a one element vector).
1886 /// Returns the splat element value in SplatValue. Any undefined bits in
1887 /// that value are zero, and the corresponding bits in the SplatUndef mask
1888 /// are set. The SplatBitSize value is set to the splat element size in
1889 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1890 /// undefined. isBigEndian describes the endianness of the target.
1891 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1892 unsigned &SplatBitSize, bool &HasAnyUndefs,
1893 unsigned MinSplatBits = 0,
1894 bool isBigEndian = false) const;
1895
1896 /// Returns the demanded splatted value or a null value if this is not a
1897 /// splat.
1898 ///
1899 /// The DemandedElts mask indicates the elements that must be in the splat.
1900 /// If passed a non-null UndefElements bitvector, it will resize it to match
1901 /// the vector width and set the bits where elements are undef.
1902 SDValue getSplatValue(const APInt &DemandedElts,
1903 BitVector *UndefElements = nullptr) const;
1904
1905 /// Returns the splatted value or a null value if this is not a splat.
1906 ///
1907 /// If passed a non-null UndefElements bitvector, it will resize it to match
1908 /// the vector width and set the bits where elements are undef.
1909 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1910
1911 /// Find the shortest repeating sequence of values in the build vector.
1912 ///
1913 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1914 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1915 ///
1916 /// Currently this must be a power-of-2 build vector.
1917 /// The DemandedElts mask indicates the elements that must be present,
1918 /// undemanded elements in Sequence may be null (SDValue()). If passed a
1919 /// non-null UndefElements bitvector, it will resize it to match the original
1920 /// vector width and set the bits where elements are undef. If result is
1921 /// false, Sequence will be empty.
1922 bool getRepeatedSequence(const APInt &DemandedElts,
1923 SmallVectorImpl<SDValue> &Sequence,
1924 BitVector *UndefElements = nullptr) const;
1925
1926 /// Find the shortest repeating sequence of values in the build vector.
1927 ///
1928 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1929 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1930 ///
1931 /// Currently this must be a power-of-2 build vector.
1932 /// If passed a non-null UndefElements bitvector, it will resize it to match
1933 /// the original vector width and set the bits where elements are undef.
1934 /// If result is false, Sequence will be empty.
1935 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
1936 BitVector *UndefElements = nullptr) const;
1937
1938 /// Returns the demanded splatted constant or null if this is not a constant
1939 /// splat.
1940 ///
1941 /// The DemandedElts mask indicates the elements that must be in the splat.
1942 /// If passed a non-null UndefElements bitvector, it will resize it to match
1943 /// the vector width and set the bits where elements are undef.
1944 ConstantSDNode *
1945 getConstantSplatNode(const APInt &DemandedElts,
1946 BitVector *UndefElements = nullptr) const;
1947
1948 /// Returns the splatted constant or null if this is not a constant
1949 /// splat.
1950 ///
1951 /// If passed a non-null UndefElements bitvector, it will resize it to match
1952 /// the vector width and set the bits where elements are undef.
1953 ConstantSDNode *
1954 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
1955
1956 /// Returns the demanded splatted constant FP or null if this is not a
1957 /// constant FP splat.
1958 ///
1959 /// The DemandedElts mask indicates the elements that must be in the splat.
1960 /// If passed a non-null UndefElements bitvector, it will resize it to match
1961 /// the vector width and set the bits where elements are undef.
1962 ConstantFPSDNode *
1963 getConstantFPSplatNode(const APInt &DemandedElts,
1964 BitVector *UndefElements = nullptr) const;
1965
1966 /// Returns the splatted constant FP or null if this is not a constant
1967 /// FP splat.
1968 ///
1969 /// If passed a non-null UndefElements bitvector, it will resize it to match
1970 /// the vector width and set the bits where elements are undef.
1971 ConstantFPSDNode *
1972 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
1973
1974 /// If this is a constant FP splat and the splatted constant FP is an
1975 /// exact power or 2, return the log base 2 integer value. Otherwise,
1976 /// return -1.
1977 ///
1978 /// The BitWidth specifies the necessary bit precision.
1979 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
1980 uint32_t BitWidth) const;
1981
1982 bool isConstant() const;
1983
1984 static bool classof(const SDNode *N) {
1985 return N->getOpcode() == ISD::BUILD_VECTOR;
1986 }
1987};
1988
1989/// An SDNode that holds an arbitrary LLVM IR Value. This is
1990/// used when the SelectionDAG needs to make a simple reference to something
1991/// in the LLVM IR representation.
1992///
1993class SrcValueSDNode : public SDNode {
1994 friend class SelectionDAG;
1995
1996 const Value *V;
1997
1998 /// Create a SrcValue for a general value.
1999 explicit SrcValueSDNode(const Value *v)
2000 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2001
2002public:
2003 /// Return the contained Value.
2004 const Value *getValue() const { return V; }
2005
2006 static bool classof(const SDNode *N) {
2007 return N->getOpcode() == ISD::SRCVALUE;
2008 }
2009};
2010
2011class MDNodeSDNode : public SDNode {
2012 friend class SelectionDAG;
2013
2014 const MDNode *MD;
2015
2016 explicit MDNodeSDNode(const MDNode *md)
2017 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2018 {}
2019
2020public:
2021 const MDNode *getMD() const { return MD; }
2022
2023 static bool classof(const SDNode *N) {
2024 return N->getOpcode() == ISD::MDNODE_SDNODE;
2025 }
2026};
2027
2028class RegisterSDNode : public SDNode {
2029 friend class SelectionDAG;
2030
2031 Register Reg;
2032
2033 RegisterSDNode(Register reg, EVT VT)
2034 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2035
2036public:
2037 Register getReg() const { return Reg; }
2038
2039 static bool classof(const SDNode *N) {
2040 return N->getOpcode() == ISD::Register;
2041 }
2042};
2043
2044class RegisterMaskSDNode : public SDNode {
2045 friend class SelectionDAG;
2046
2047 // The memory for RegMask is not owned by the node.
2048 const uint32_t *RegMask;
2049
2050 RegisterMaskSDNode(const uint32_t *mask)
2051 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2052 RegMask(mask) {}
2053
2054public:
2055 const uint32_t *getRegMask() const { return RegMask; }
2056
2057 static bool classof(const SDNode *N) {
2058 return N->getOpcode() == ISD::RegisterMask;
2059 }
2060};
2061
2062class BlockAddressSDNode : public SDNode {
2063 friend class SelectionDAG;
2064
2065 const BlockAddress *BA;
2066 int64_t Offset;
2067 unsigned TargetFlags;
2068
2069 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2070 int64_t o, unsigned Flags)
2071 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2072 BA(ba), Offset(o), TargetFlags(Flags) {}
2073
2074public:
2075 const BlockAddress *getBlockAddress() const { return BA; }
2076 int64_t getOffset() const { return Offset; }
2077 unsigned getTargetFlags() const { return TargetFlags; }
2078
2079 static bool classof(const SDNode *N) {
2080 return N->getOpcode() == ISD::BlockAddress ||
2081 N->getOpcode() == ISD::TargetBlockAddress;
2082 }
2083};
2084
2085class LabelSDNode : public SDNode {
2086 friend class SelectionDAG;
2087
2088 MCSymbol *Label;
2089
2090 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2091 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2092 assert(LabelSDNode::classof(this) && "not a label opcode")((LabelSDNode::classof(this) && "not a label opcode")
? static_cast<void> (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2092, __PRETTY_FUNCTION__))
;
2093 }
2094
2095public:
2096 MCSymbol *getLabel() const { return Label; }
2097
2098 static bool classof(const SDNode *N) {
2099 return N->getOpcode() == ISD::EH_LABEL ||
2100 N->getOpcode() == ISD::ANNOTATION_LABEL;
2101 }
2102};
2103
2104class ExternalSymbolSDNode : public SDNode {
2105 friend class SelectionDAG;
2106
2107 const char *Symbol;
2108 unsigned TargetFlags;
2109
2110 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2111 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2112 DebugLoc(), getSDVTList(VT)),
2113 Symbol(Sym), TargetFlags(TF) {}
2114
2115public:
2116 const char *getSymbol() const { return Symbol; }
2117 unsigned getTargetFlags() const { return TargetFlags; }
2118
2119 static bool classof(const SDNode *N) {
2120 return N->getOpcode() == ISD::ExternalSymbol ||
2121 N->getOpcode() == ISD::TargetExternalSymbol;
2122 }
2123};
2124
2125class MCSymbolSDNode : public SDNode {
2126 friend class SelectionDAG;
2127
2128 MCSymbol *Symbol;
2129
2130 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2131 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2132
2133public:
2134 MCSymbol *getMCSymbol() const { return Symbol; }
2135
2136 static bool classof(const SDNode *N) {
2137 return N->getOpcode() == ISD::MCSymbol;
2138 }
2139};
2140
2141class CondCodeSDNode : public SDNode {
2142 friend class SelectionDAG;
2143
2144 ISD::CondCode Condition;
2145
2146 explicit CondCodeSDNode(ISD::CondCode Cond)
2147 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2148 Condition(Cond) {}
2149
2150public:
2151 ISD::CondCode get() const { return Condition; }
2152
2153 static bool classof(const SDNode *N) {
2154 return N->getOpcode() == ISD::CONDCODE;
2155 }
2156};
2157
2158/// This class is used to represent EVT's, which are used
2159/// to parameterize some operations.
2160class VTSDNode : public SDNode {
2161 friend class SelectionDAG;
2162
2163 EVT ValueType;
2164
2165 explicit VTSDNode(EVT VT)
2166 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2167 ValueType(VT) {}
2168
2169public:
2170 EVT getVT() const { return ValueType; }
2171
2172 static bool classof(const SDNode *N) {
2173 return N->getOpcode() == ISD::VALUETYPE;
2174 }
2175};
2176
2177/// Base class for LoadSDNode and StoreSDNode
2178class LSBaseSDNode : public MemSDNode {
2179public:
2180 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2181 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2182 MachineMemOperand *MMO)
2183 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2184 LSBaseSDNodeBits.AddressingMode = AM;
2185 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2185, __PRETTY_FUNCTION__))
;
2186 }
2187
2188 const SDValue &getOffset() const {
2189 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2190 }
2191
2192 /// Return the addressing mode for this load or store:
2193 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2194 ISD::MemIndexedMode getAddressingMode() const {
2195 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2196 }
2197
2198 /// Return true if this is a pre/post inc/dec load/store.
2199 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2200
2201 /// Return true if this is NOT a pre/post inc/dec load/store.
2202 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2203
2204 static bool classof(const SDNode *N) {
2205 return N->getOpcode() == ISD::LOAD ||
2206 N->getOpcode() == ISD::STORE;
2207 }
2208};
2209
2210/// This class is used to represent ISD::LOAD nodes.
2211class LoadSDNode : public LSBaseSDNode {
2212 friend class SelectionDAG;
2213
2214 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2215 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2216 MachineMemOperand *MMO)
2217 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2218 LoadSDNodeBits.ExtTy = ETy;
2219 assert(readMem() && "Load MachineMemOperand is not a load!")((readMem() && "Load MachineMemOperand is not a load!"
) ? static_cast<void> (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2219, __PRETTY_FUNCTION__))
;
2220 assert(!writeMem() && "Load MachineMemOperand is a store!")((!writeMem() && "Load MachineMemOperand is a store!"
) ? static_cast<void> (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2220, __PRETTY_FUNCTION__))
;
2221 }
2222
2223public:
2224 /// Return whether this is a plain node,
2225 /// or one of the varieties of value-extending loads.
2226 ISD::LoadExtType getExtensionType() const {
2227 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2228 }
2229
2230 const SDValue &getBasePtr() const { return getOperand(1); }
2231 const SDValue &getOffset() const { return getOperand(2); }
2232
2233 static bool classof(const SDNode *N) {
2234 return N->getOpcode() == ISD::LOAD;
2235 }
2236};
2237
2238/// This class is used to represent ISD::STORE nodes.
2239class StoreSDNode : public LSBaseSDNode {
2240 friend class SelectionDAG;
2241
2242 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2243 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2244 MachineMemOperand *MMO)
2245 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2246 StoreSDNodeBits.IsTruncating = isTrunc;
2247 assert(!readMem() && "Store MachineMemOperand is a load!")((!readMem() && "Store MachineMemOperand is a load!")
? static_cast<void> (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2247, __PRETTY_FUNCTION__))
;
2248 assert(writeMem() && "Store MachineMemOperand is not a store!")((writeMem() && "Store MachineMemOperand is not a store!"
) ? static_cast<void> (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2248, __PRETTY_FUNCTION__))
;
2249 }
2250
2251public:
2252 /// Return true if the op does a truncation before store.
2253 /// For integers this is the same as doing a TRUNCATE and storing the result.
2254 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2255 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2256 void setTruncatingStore(bool Truncating) {
2257 StoreSDNodeBits.IsTruncating = Truncating;
2258 }
2259
2260 const SDValue &getValue() const { return getOperand(1); }
2261 const SDValue &getBasePtr() const { return getOperand(2); }
2262 const SDValue &getOffset() const { return getOperand(3); }
2263
2264 static bool classof(const SDNode *N) {
2265 return N->getOpcode() == ISD::STORE;
2266 }
2267};
2268
2269/// This base class is used to represent MLOAD and MSTORE nodes
2270class MaskedLoadStoreSDNode : public MemSDNode {
2271public:
2272 friend class SelectionDAG;
2273
2274 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2275 const DebugLoc &dl, SDVTList VTs,
2276 ISD::MemIndexedMode AM, EVT MemVT,
2277 MachineMemOperand *MMO)
2278 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2279 LSBaseSDNodeBits.AddressingMode = AM;
2280 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2280, __PRETTY_FUNCTION__))
;
2281 }
2282
2283 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2284 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2285 // Mask is a vector of i1 elements
2286 const SDValue &getOffset() const {
2287 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2288 }
2289 const SDValue &getMask() const {
2290 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2291 }
2292
2293 /// Return the addressing mode for this load or store:
2294 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2295 ISD::MemIndexedMode getAddressingMode() const {
2296 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2297 }
2298
2299 /// Return true if this is a pre/post inc/dec load/store.
2300 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2301
2302 /// Return true if this is NOT a pre/post inc/dec load/store.
2303 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2304
2305 static bool classof(const SDNode *N) {
2306 return N->getOpcode() == ISD::MLOAD ||
2307 N->getOpcode() == ISD::MSTORE;
2308 }
2309};
2310
2311/// This class is used to represent an MLOAD node
2312class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2313public:
2314 friend class SelectionDAG;
2315
2316 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2317 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2318 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2319 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2320 LoadSDNodeBits.ExtTy = ETy;
2321 LoadSDNodeBits.IsExpanding = IsExpanding;
2322 }
2323
2324 ISD::LoadExtType getExtensionType() const {
2325 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2326 }
2327
2328 const SDValue &getBasePtr() const { return getOperand(1); }
2329 const SDValue &getOffset() const { return getOperand(2); }
2330 const SDValue &getMask() const { return getOperand(3); }
2331 const SDValue &getPassThru() const { return getOperand(4); }
2332
2333 static bool classof(const SDNode *N) {
2334 return N->getOpcode() == ISD::MLOAD;
2335 }
2336
2337 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2338};
2339
2340/// This class is used to represent an MSTORE node
2341class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2342public:
2343 friend class SelectionDAG;
2344
2345 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2346 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2347 EVT MemVT, MachineMemOperand *MMO)
2348 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2349 StoreSDNodeBits.IsTruncating = isTrunc;
2350 StoreSDNodeBits.IsCompressing = isCompressing;
2351 }
2352
2353 /// Return true if the op does a truncation before store.
2354 /// For integers this is the same as doing a TRUNCATE and storing the result.
2355 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2356 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2357
2358 /// Returns true if the op does a compression to the vector before storing.
2359 /// The node contiguously stores the active elements (integers or floats)
2360 /// in src (those with their respective bit set in writemask k) to unaligned
2361 /// memory at base_addr.
2362 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2363
2364 const SDValue &getValue() const { return getOperand(1); }
2365 const SDValue &getBasePtr() const { return getOperand(2); }
2366 const SDValue &getOffset() const { return getOperand(3); }
2367 const SDValue &getMask() const { return getOperand(4); }
2368
2369 static bool classof(const SDNode *N) {
2370 return N->getOpcode() == ISD::MSTORE;
2371 }
2372};
2373
2374/// This is a base class used to represent
2375/// MGATHER and MSCATTER nodes
2376///
2377class MaskedGatherScatterSDNode : public MemSDNode {
2378public:
2379 friend class SelectionDAG;
2380
2381 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2382 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2383 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2384 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2385 LSBaseSDNodeBits.AddressingMode = IndexType;
2386 assert(getIndexType() == IndexType && "Value truncated")((getIndexType() == IndexType && "Value truncated") ?
static_cast<void> (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2386, __PRETTY_FUNCTION__))
;
2387 }
2388
2389 /// How is Index applied to BasePtr when computing addresses.
2390 ISD::MemIndexType getIndexType() const {
2391 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2392 }
2393 bool isIndexScaled() const {
2394 return (getIndexType() == ISD::SIGNED_SCALED) ||
2395 (getIndexType() == ISD::UNSIGNED_SCALED);
2396 }
2397 bool isIndexSigned() const {
2398 return (getIndexType() == ISD::SIGNED_SCALED) ||
2399 (getIndexType() == ISD::SIGNED_UNSCALED);
2400 }
2401
2402 // In the both nodes address is Op1, mask is Op2:
2403 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2404 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2405 // Mask is a vector of i1 elements
2406 const SDValue &getBasePtr() const { return getOperand(3); }
2407 const SDValue &getIndex() const { return getOperand(4); }
2408 const SDValue &getMask() const { return getOperand(2); }
2409 const SDValue &getScale() const { return getOperand(5); }
2410
2411 static bool classof(const SDNode *N) {
2412 return N->getOpcode() == ISD::MGATHER ||
2413 N->getOpcode() == ISD::MSCATTER;
2414 }
2415};
2416
2417/// This class is used to represent an MGATHER node
2418///
2419class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2420public:
2421 friend class SelectionDAG;
2422
2423 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2424 EVT MemVT, MachineMemOperand *MMO,
2425 ISD::MemIndexType IndexType)
2426 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2427 IndexType) {}
2428
2429 const SDValue &getPassThru() const { return getOperand(1); }
2430
2431 static bool classof(const SDNode *N) {
2432 return N->getOpcode() == ISD::MGATHER;
2433 }
2434};
2435
2436/// This class is used to represent an MSCATTER node
2437///
2438class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2439public:
2440 friend class SelectionDAG;
2441
2442 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2443 EVT MemVT, MachineMemOperand *MMO,
2444 ISD::MemIndexType IndexType)
2445 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2446 IndexType) {}
2447
2448 const SDValue &getValue() const { return getOperand(1); }
2449
2450 static bool classof(const SDNode *N) {
2451 return N->getOpcode() == ISD::MSCATTER;
2452 }
2453};
2454
2455/// An SDNode that represents everything that will be needed
2456/// to construct a MachineInstr. These nodes are created during the
2457/// instruction selection proper phase.
2458///
2459/// Note that the only supported way to set the `memoperands` is by calling the
2460/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2461/// inside the DAG rather than in the node.
2462class MachineSDNode : public SDNode {
2463private:
2464 friend class SelectionDAG;
2465
2466 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2467 : SDNode(Opc, Order, DL, VTs) {}
2468
2469 // We use a pointer union between a single `MachineMemOperand` pointer and
2470 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2471 // the number of these is zero, the single pointer variant used when the
2472 // number is one, and the array is used for larger numbers.
2473 //
2474 // The array is allocated via the `SelectionDAG`'s allocator and so will
2475 // always live until the DAG is cleaned up and doesn't require ownership here.
2476 //
2477 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2478 // subclasses aren't managed in a conforming C++ manner. See the comments on
2479 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2480 // constraint here is that these don't manage memory with their constructor or
2481 // destructor and can be initialized to a good state even if they start off
2482 // uninitialized.
2483 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2484
2485 // Note that this could be folded into the above `MemRefs` member if doing so
2486 // is advantageous at some point. We don't need to store this in most cases.
2487 // However, at the moment this doesn't appear to make the allocation any
2488 // smaller and makes the code somewhat simpler to read.
2489 int NumMemRefs = 0;
2490
2491public:
2492 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2493
2494 ArrayRef<MachineMemOperand *> memoperands() const {
2495 // Special case the common cases.
2496 if (NumMemRefs == 0)
2497 return {};
2498 if (NumMemRefs == 1)
2499 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2500
2501 // Otherwise we have an actual array.
2502 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2503 }
2504 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2505 mmo_iterator memoperands_end() const { return memoperands().end(); }
2506 bool memoperands_empty() const { return memoperands().empty(); }
2507
2508 /// Clear out the memory reference descriptor list.
2509 void clearMemRefs() {
2510 MemRefs = nullptr;
2511 NumMemRefs = 0;
2512 }
2513
2514 static bool classof(const SDNode *N) {
2515 return N->isMachineOpcode();
2516 }
2517};
2518
2519/// An SDNode that records if a register contains a value that is guaranteed to
2520/// be aligned accordingly.
2521class AssertAlignSDNode : public SDNode {
2522 Align Alignment;
2523
2524public:
2525 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2526 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2527
2528 Align getAlign() const { return Alignment; }
2529
2530 static bool classof(const SDNode *N) {
2531 return N->getOpcode() == ISD::AssertAlign;
2532 }
2533};
2534
2535class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
2536 SDNode, ptrdiff_t> {
2537 const SDNode *Node;
2538 unsigned Operand;
2539
2540 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2541
2542public:
2543 bool operator==(const SDNodeIterator& x) const {
2544 return Operand == x.Operand;
2545 }
2546 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2547
2548 pointer operator*() const {
2549 return Node->getOperand(Operand).getNode();
2550 }
2551 pointer operator->() const { return operator*(); }
2552
2553 SDNodeIterator& operator++() { // Preincrement
2554 ++Operand;
2555 return *this;
2556 }
2557 SDNodeIterator operator++(int) { // Postincrement
2558 SDNodeIterator tmp = *this; ++*this; return tmp;
2559 }
2560 size_t operator-(SDNodeIterator Other) const {
2561 assert(Node == Other.Node &&((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2562, __PRETTY_FUNCTION__))
2562 "Cannot compare iterators of two different nodes!")((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2562, __PRETTY_FUNCTION__))
;
2563 return Operand - Other.Operand;
2564 }
2565
2566 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2567 static SDNodeIterator end (const SDNode *N) {
2568 return SDNodeIterator(N, N->getNumOperands());
2569 }
2570
2571 unsigned getOperand() const { return Operand; }
2572 const SDNode *getNode() const { return Node; }
2573};
2574
2575template <> struct GraphTraits<SDNode*> {
2576 using NodeRef = SDNode *;
2577 using ChildIteratorType = SDNodeIterator;
2578
2579 static NodeRef getEntryNode(SDNode *N) { return N; }
2580
2581 static ChildIteratorType child_begin(NodeRef N) {
2582 return SDNodeIterator::begin(N);
2583 }
2584
2585 static ChildIteratorType child_end(NodeRef N) {
2586 return SDNodeIterator::end(N);
2587 }
2588};
2589
2590/// A representation of the largest SDNode, for use in sizeof().
2591///
2592/// This needs to be a union because the largest node differs on 32 bit systems
2593/// with 4 and 8 byte pointer alignment, respectively.
2594using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2595 BlockAddressSDNode,
2596 GlobalAddressSDNode>;
2597
2598/// The SDNode class with the greatest alignment requirement.
2599using MostAlignedSDNode = GlobalAddressSDNode;
2600
2601namespace ISD {
2602
2603 /// Returns true if the specified node is a non-extending and unindexed load.
2604 inline bool isNormalLoad(const SDNode *N) {
2605 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2606 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2607 Ld->getAddressingMode() == ISD::UNINDEXED;
2608 }
2609
2610 /// Returns true if the specified node is a non-extending load.
2611 inline bool isNON_EXTLoad(const SDNode *N) {
2612 return isa<LoadSDNode>(N) &&
2613 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2614 }
2615
2616 /// Returns true if the specified node is a EXTLOAD.
2617 inline bool isEXTLoad(const SDNode *N) {
2618 return isa<LoadSDNode>(N) &&
2619 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2620 }
2621
2622 /// Returns true if the specified node is a SEXTLOAD.
2623 inline bool isSEXTLoad(const SDNode *N) {
2624 return isa<LoadSDNode>(N) &&
2625 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2626 }
2627
2628 /// Returns true if the specified node is a ZEXTLOAD.
2629 inline bool isZEXTLoad(const SDNode *N) {
2630 return isa<LoadSDNode>(N) &&
2631 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2632 }
2633
2634 /// Returns true if the specified node is an unindexed load.
2635 inline bool isUNINDEXEDLoad(const SDNode *N) {
2636 return isa<LoadSDNode>(N) &&
2637 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2638 }
2639
2640 /// Returns true if the specified node is a non-truncating
2641 /// and unindexed store.
2642 inline bool isNormalStore(const SDNode *N) {
2643 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2644 return St && !St->isTruncatingStore() &&
2645 St->getAddressingMode() == ISD::UNINDEXED;
2646 }
2647
2648 /// Returns true if the specified node is a non-truncating store.
2649 inline bool isNON_TRUNCStore(const SDNode *N) {
2650 return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
2651 }
2652
2653 /// Returns true if the specified node is a truncating store.
2654 inline bool isTRUNCStore(const SDNode *N) {
2655 return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
2656 }
2657
2658 /// Returns true if the specified node is an unindexed store.
2659 inline bool isUNINDEXEDStore(const SDNode *N) {
2660 return isa<StoreSDNode>(N) &&
2661 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2662 }
2663
2664 /// Attempt to match a unary predicate against a scalar/splat constant or
2665 /// every element of a constant BUILD_VECTOR.
2666 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2667 bool matchUnaryPredicate(SDValue Op,
2668 std::function<bool(ConstantSDNode *)> Match,
2669 bool AllowUndefs = false);
2670
2671 /// Attempt to match a binary predicate against a pair of scalar/splat
2672 /// constants or every element of a pair of constant BUILD_VECTORs.
2673 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2674 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2675 bool matchBinaryPredicate(
2676 SDValue LHS, SDValue RHS,
2677 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2678 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2679
2680 /// Returns true if the specified value is the overflow result from one
2681 /// of the overflow intrinsic nodes.
2682 inline bool isOverflowIntrOpRes(SDValue Op) {
2683 unsigned Opc = Op.getOpcode();
2684 return (Op.getResNo() == 1 &&
2685 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2686 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2687 }
2688
2689} // end namespace ISD
2690
2691} // end namespace llvm
2692
2693#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H