Bug Summary

File:llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 9248, column 36
Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-09-26-161721-17566-1 -x c++ /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCRegisterInfo.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MachineValueType.h"
90#include "llvm/Support/MathExtras.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Target/TargetMachine.h"
93#include "llvm/Target/TargetOptions.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124static cl::opt<bool> EnablePPCPCRelTLS(
125 "enable-ppc-pcrel-tls",
126 cl::desc("enable the use of PC relative memops in TLS instructions on PPC"),
127 cl::Hidden);
128
129STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"}
;
130STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"}
;
131STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
}
;
132STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
}
;
133
134static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135
136static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137
138// FIXME: Remove this once the bug has been fixed!
139extern cl::opt<bool> ANDIGlueBug;
140
141PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142 const PPCSubtarget &STI)
143 : TargetLowering(TM), Subtarget(STI) {
144 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145 // arguments are at least 4/8 bytes aligned.
146 bool isPPC64 = Subtarget.isPPC64();
147 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148
149 // Set up the register classes.
150 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151 if (!useSoftFloat()) {
152 if (hasSPE()) {
153 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155 } else {
156 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158 }
159 }
160
161 // Match BITREVERSE to customized fast code sequence in the td file.
162 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164
165 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167
168 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169 for (MVT VT : MVT::integer_valuetypes()) {
170 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172 }
173
174 if (Subtarget.isISA3_0()) {
175 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179 } else {
180 // No extending loads from f16 or HW conversions back and forth.
181 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 }
190
191 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192
193 // PowerPC has pre-inc load and store's.
194 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204 if (!Subtarget.hasSPE()) {
205 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209 }
210
211 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213 for (MVT VT : ScalarIntVTs) {
214 setOperationAction(ISD::ADDC, VT, Legal);
215 setOperationAction(ISD::ADDE, VT, Legal);
216 setOperationAction(ISD::SUBC, VT, Legal);
217 setOperationAction(ISD::SUBE, VT, Legal);
218 }
219
220 if (Subtarget.useCRBits()) {
221 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222
223 if (isPPC64 || Subtarget.hasFPCVT()) {
224 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
225 AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
226 isPPC64 ? MVT::i64 : MVT::i32);
227 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
228 AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
229 isPPC64 ? MVT::i64 : MVT::i32);
230
231 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
232 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
233 isPPC64 ? MVT::i64 : MVT::i32);
234 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
235 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
236 isPPC64 ? MVT::i64 : MVT::i32);
237 } else {
238 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
239 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
242 }
243
244 // PowerPC does not support direct load/store of condition registers.
245 setOperationAction(ISD::LOAD, MVT::i1, Custom);
246 setOperationAction(ISD::STORE, MVT::i1, Custom);
247
248 // FIXME: Remove this once the ANDI glue bug is fixed:
249 if (ANDIGlueBug)
250 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
251
252 for (MVT VT : MVT::integer_valuetypes()) {
253 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
254 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
255 setTruncStoreAction(VT, MVT::i1, Expand);
256 }
257
258 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
259 }
260
261 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
262 // PPC (the libcall is not available).
263 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
264 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
265 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
267
268 // We do not currently implement these libm ops for PowerPC.
269 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
270 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
271 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
272 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
273 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
274 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
275
276 // PowerPC has no SREM/UREM instructions unless we are on P9
277 // On P9 we may use a hardware instruction to compute the remainder.
278 // When the result of both the remainder and the division is required it is
279 // more efficient to compute the remainder from the result of the division
280 // rather than use the remainder instruction. The instructions are legalized
281 // directly because the DivRemPairsPass performs the transformation at the IR
282 // level.
283 if (Subtarget.isISA3_0()) {
284 setOperationAction(ISD::SREM, MVT::i32, Legal);
285 setOperationAction(ISD::UREM, MVT::i32, Legal);
286 setOperationAction(ISD::SREM, MVT::i64, Legal);
287 setOperationAction(ISD::UREM, MVT::i64, Legal);
288 } else {
289 setOperationAction(ISD::SREM, MVT::i32, Expand);
290 setOperationAction(ISD::UREM, MVT::i32, Expand);
291 setOperationAction(ISD::SREM, MVT::i64, Expand);
292 setOperationAction(ISD::UREM, MVT::i64, Expand);
293 }
294
295 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
296 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
299 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
300 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
301 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
302 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
304
305 // Handle constrained floating-point operations of scalar.
306 // TODO: Handle SPE specific operation.
307 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
308 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
309 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
310 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
311 setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
312 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
313
314 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
315 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
316 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
317 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
318 setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
319 if (Subtarget.hasVSX()) {
320 setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
321 setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
322 }
323
324 if (Subtarget.hasFSQRT()) {
325 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
326 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
327 }
328
329 if (Subtarget.hasFPRND()) {
330 setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
331 setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
332 setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
333 setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
334
335 setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
336 setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
337 setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
338 setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
339 }
340
341 // We don't support sin/cos/sqrt/fmod/pow
342 setOperationAction(ISD::FSIN , MVT::f64, Expand);
343 setOperationAction(ISD::FCOS , MVT::f64, Expand);
344 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
345 setOperationAction(ISD::FREM , MVT::f64, Expand);
346 setOperationAction(ISD::FPOW , MVT::f64, Expand);
347 setOperationAction(ISD::FSIN , MVT::f32, Expand);
348 setOperationAction(ISD::FCOS , MVT::f32, Expand);
349 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
350 setOperationAction(ISD::FREM , MVT::f32, Expand);
351 setOperationAction(ISD::FPOW , MVT::f32, Expand);
352 if (Subtarget.hasSPE()) {
353 setOperationAction(ISD::FMA , MVT::f64, Expand);
354 setOperationAction(ISD::FMA , MVT::f32, Expand);
355 } else {
356 setOperationAction(ISD::FMA , MVT::f64, Legal);
357 setOperationAction(ISD::FMA , MVT::f32, Legal);
358 }
359
360 if (Subtarget.hasSPE())
361 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
362
363 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
364
365 // If we're enabling GP optimizations, use hardware square root
366 if (!Subtarget.hasFSQRT() &&
367 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
368 Subtarget.hasFRE()))
369 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
370
371 if (!Subtarget.hasFSQRT() &&
372 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
373 Subtarget.hasFRES()))
374 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
375
376 if (Subtarget.hasFCPSGN()) {
377 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
378 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
379 } else {
380 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
381 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
382 }
383
384 if (Subtarget.hasFPRND()) {
385 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
386 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
387 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
388 setOperationAction(ISD::FROUND, MVT::f64, Legal);
389
390 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
391 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
392 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
393 setOperationAction(ISD::FROUND, MVT::f32, Legal);
394 }
395
396 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
397 // to speed up scalar BSWAP64.
398 // CTPOP or CTTZ were introduced in P8/P9 respectively
399 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
400 if (Subtarget.hasP9Vector())
401 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
402 else
403 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
404 if (Subtarget.isISA3_0()) {
405 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
406 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
407 } else {
408 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
409 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
410 }
411
412 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
413 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
414 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
415 } else {
416 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
417 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
418 }
419
420 // PowerPC does not have ROTR
421 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
422 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
423
424 if (!Subtarget.useCRBits()) {
425 // PowerPC does not have Select
426 setOperationAction(ISD::SELECT, MVT::i32, Expand);
427 setOperationAction(ISD::SELECT, MVT::i64, Expand);
428 setOperationAction(ISD::SELECT, MVT::f32, Expand);
429 setOperationAction(ISD::SELECT, MVT::f64, Expand);
430 }
431
432 // PowerPC wants to turn select_cc of FP into fsel when possible.
433 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435
436 // PowerPC wants to optimize integer setcc a bit
437 if (!Subtarget.useCRBits())
438 setOperationAction(ISD::SETCC, MVT::i32, Custom);
439
440 if (Subtarget.hasFPU()) {
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
442 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
443 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
444
445 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
446 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
447 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
448 }
449
450 // PowerPC does not have BRCOND which requires SetCC
451 if (!Subtarget.useCRBits())
452 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
453
454 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
455
456 if (Subtarget.hasSPE()) {
457 // SPE has built-in conversions
458 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
459 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
460 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
461 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
463 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
464 } else {
465 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
466 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
467 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
468
469 // PowerPC does not have [U|S]INT_TO_FP
470 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
472 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
473 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
474 }
475
476 if (Subtarget.hasDirectMove() && isPPC64) {
477 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
478 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
479 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
480 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
481 if (TM.Options.UnsafeFPMath) {
482 setOperationAction(ISD::LRINT, MVT::f64, Legal);
483 setOperationAction(ISD::LRINT, MVT::f32, Legal);
484 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
485 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
486 setOperationAction(ISD::LROUND, MVT::f64, Legal);
487 setOperationAction(ISD::LROUND, MVT::f32, Legal);
488 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
489 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
490 }
491 } else {
492 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
493 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
494 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
495 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
496 }
497
498 // We cannot sextinreg(i1). Expand to shifts.
499 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
500
501 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
502 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
503 // support continuation, user-level threading, and etc.. As a result, no
504 // other SjLj exception interfaces are implemented and please don't build
505 // your own exception handling based on them.
506 // LLVM/Clang supports zero-cost DWARF exception handling.
507 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
508 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
509
510 // We want to legalize GlobalAddress and ConstantPool nodes into the
511 // appropriate instructions to materialize the address.
512 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
513 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
514 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
515 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
516 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
517 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
518 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
519 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
520 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
521 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
522
523 // TRAP is legal.
524 setOperationAction(ISD::TRAP, MVT::Other, Legal);
525
526 // TRAMPOLINE is custom lowered.
527 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
528 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
529
530 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
531 setOperationAction(ISD::VASTART , MVT::Other, Custom);
532
533 if (Subtarget.is64BitELFABI()) {
534 // VAARG always uses double-word chunks, so promote anything smaller.
535 setOperationAction(ISD::VAARG, MVT::i1, Promote);
536 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
537 setOperationAction(ISD::VAARG, MVT::i8, Promote);
538 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
539 setOperationAction(ISD::VAARG, MVT::i16, Promote);
540 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
541 setOperationAction(ISD::VAARG, MVT::i32, Promote);
542 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
543 setOperationAction(ISD::VAARG, MVT::Other, Expand);
544 } else if (Subtarget.is32BitELFABI()) {
545 // VAARG is custom lowered with the 32-bit SVR4 ABI.
546 setOperationAction(ISD::VAARG, MVT::Other, Custom);
547 setOperationAction(ISD::VAARG, MVT::i64, Custom);
548 } else
549 setOperationAction(ISD::VAARG, MVT::Other, Expand);
550
551 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
552 if (Subtarget.is32BitELFABI())
553 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
554 else
555 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
556
557 // Use the default implementation.
558 setOperationAction(ISD::VAEND , MVT::Other, Expand);
559 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
560 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
561 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
562 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
563 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
564 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
565 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
566 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
567
568 // We want to custom lower some of our intrinsics.
569 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
570
571 // To handle counter-based loop conditions.
572 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
573
574 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
575 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
576 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
577 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
578
579 // Comparisons that require checking two conditions.
580 if (Subtarget.hasSPE()) {
581 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
582 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
583 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
584 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
585 }
586 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
587 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
588 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
589 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
590 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
591 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
592 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
593 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
594 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
595 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
596 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
597 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
598
599 if (Subtarget.has64BitSupport()) {
600 // They also have instructions for converting between i64 and fp.
601 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
602 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
603 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
604 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
605 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
606 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
607 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
608 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
609 // This is just the low 32 bits of a (signed) fp->i64 conversion.
610 // We cannot do this with Promote because i64 is not a legal type.
611 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
612 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
613
614 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
615 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
616 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
617 }
618 } else {
619 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
620 if (Subtarget.hasSPE()) {
621 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
622 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
623 } else {
624 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
625 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
626 }
627 }
628
629 // With the instructions enabled under FPCVT, we can do everything.
630 if (Subtarget.hasFPCVT()) {
631 if (Subtarget.has64BitSupport()) {
632 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
633 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
634 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
635 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
636 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
637 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
638 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
639 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
640 }
641
642 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
643 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
644 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
645 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
646 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
647 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
648 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
649 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
650 }
651
652 if (Subtarget.use64BitRegs()) {
653 // 64-bit PowerPC implementations can support i64 types directly
654 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
655 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
656 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
657 // 64-bit PowerPC wants to expand i128 shifts itself.
658 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
659 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
660 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
661 } else {
662 // 32-bit PowerPC wants to expand i64 shifts itself.
663 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
664 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
665 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
666 }
667
668 // PowerPC has better expansions for funnel shifts than the generic
669 // TargetLowering::expandFunnelShift.
670 if (Subtarget.has64BitSupport()) {
671 setOperationAction(ISD::FSHL, MVT::i64, Custom);
672 setOperationAction(ISD::FSHR, MVT::i64, Custom);
673 }
674 setOperationAction(ISD::FSHL, MVT::i32, Custom);
675 setOperationAction(ISD::FSHR, MVT::i32, Custom);
676
677 if (Subtarget.hasVSX()) {
678 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
679 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
680 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
681 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
682 }
683
684 if (Subtarget.hasAltivec()) {
685 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
686 setOperationAction(ISD::SADDSAT, VT, Legal);
687 setOperationAction(ISD::SSUBSAT, VT, Legal);
688 setOperationAction(ISD::UADDSAT, VT, Legal);
689 setOperationAction(ISD::USUBSAT, VT, Legal);
690 }
691 // First set operation action for all vector types to expand. Then we
692 // will selectively turn on ones that can be effectively codegen'd.
693 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
694 // add/sub are legal for all supported vector VT's.
695 setOperationAction(ISD::ADD, VT, Legal);
696 setOperationAction(ISD::SUB, VT, Legal);
697
698 // For v2i64, these are only valid with P8Vector. This is corrected after
699 // the loop.
700 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
701 setOperationAction(ISD::SMAX, VT, Legal);
702 setOperationAction(ISD::SMIN, VT, Legal);
703 setOperationAction(ISD::UMAX, VT, Legal);
704 setOperationAction(ISD::UMIN, VT, Legal);
705 }
706 else {
707 setOperationAction(ISD::SMAX, VT, Expand);
708 setOperationAction(ISD::SMIN, VT, Expand);
709 setOperationAction(ISD::UMAX, VT, Expand);
710 setOperationAction(ISD::UMIN, VT, Expand);
711 }
712
713 if (Subtarget.hasVSX()) {
714 setOperationAction(ISD::FMAXNUM, VT, Legal);
715 setOperationAction(ISD::FMINNUM, VT, Legal);
716 }
717
718 // Vector instructions introduced in P8
719 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
720 setOperationAction(ISD::CTPOP, VT, Legal);
721 setOperationAction(ISD::CTLZ, VT, Legal);
722 }
723 else {
724 setOperationAction(ISD::CTPOP, VT, Expand);
725 setOperationAction(ISD::CTLZ, VT, Expand);
726 }
727
728 // Vector instructions introduced in P9
729 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
730 setOperationAction(ISD::CTTZ, VT, Legal);
731 else
732 setOperationAction(ISD::CTTZ, VT, Expand);
733
734 // We promote all shuffles to v16i8.
735 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
736 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
737
738 // We promote all non-typed operations to v4i32.
739 setOperationAction(ISD::AND , VT, Promote);
740 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
741 setOperationAction(ISD::OR , VT, Promote);
742 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
743 setOperationAction(ISD::XOR , VT, Promote);
744 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
745 setOperationAction(ISD::LOAD , VT, Promote);
746 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
747 setOperationAction(ISD::SELECT, VT, Promote);
748 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
749 setOperationAction(ISD::VSELECT, VT, Legal);
750 setOperationAction(ISD::SELECT_CC, VT, Promote);
751 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
752 setOperationAction(ISD::STORE, VT, Promote);
753 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
754
755 // No other operations are legal.
756 setOperationAction(ISD::MUL , VT, Expand);
757 setOperationAction(ISD::SDIV, VT, Expand);
758 setOperationAction(ISD::SREM, VT, Expand);
759 setOperationAction(ISD::UDIV, VT, Expand);
760 setOperationAction(ISD::UREM, VT, Expand);
761 setOperationAction(ISD::FDIV, VT, Expand);
762 setOperationAction(ISD::FREM, VT, Expand);
763 setOperationAction(ISD::FNEG, VT, Expand);
764 setOperationAction(ISD::FSQRT, VT, Expand);
765 setOperationAction(ISD::FLOG, VT, Expand);
766 setOperationAction(ISD::FLOG10, VT, Expand);
767 setOperationAction(ISD::FLOG2, VT, Expand);
768 setOperationAction(ISD::FEXP, VT, Expand);
769 setOperationAction(ISD::FEXP2, VT, Expand);
770 setOperationAction(ISD::FSIN, VT, Expand);
771 setOperationAction(ISD::FCOS, VT, Expand);
772 setOperationAction(ISD::FABS, VT, Expand);
773 setOperationAction(ISD::FFLOOR, VT, Expand);
774 setOperationAction(ISD::FCEIL, VT, Expand);
775 setOperationAction(ISD::FTRUNC, VT, Expand);
776 setOperationAction(ISD::FRINT, VT, Expand);
777 setOperationAction(ISD::FNEARBYINT, VT, Expand);
778 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
779 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
781 setOperationAction(ISD::MULHU, VT, Expand);
782 setOperationAction(ISD::MULHS, VT, Expand);
783 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
784 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785 setOperationAction(ISD::UDIVREM, VT, Expand);
786 setOperationAction(ISD::SDIVREM, VT, Expand);
787 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
788 setOperationAction(ISD::FPOW, VT, Expand);
789 setOperationAction(ISD::BSWAP, VT, Expand);
790 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
791 setOperationAction(ISD::ROTL, VT, Expand);
792 setOperationAction(ISD::ROTR, VT, Expand);
793
794 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
795 setTruncStoreAction(VT, InnerVT, Expand);
796 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
797 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
798 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
799 }
800 }
801 setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
802 if (!Subtarget.hasP8Vector()) {
803 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
804 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
805 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
806 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
807 }
808
809 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
810 setOperationAction(ISD::ABS, VT, Custom);
811
812 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
813 // with merges, splats, etc.
814 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
815
816 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
817 // are cheap, so handle them before they get expanded to scalar.
818 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
819 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
820 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
821 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
822 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
823
824 setOperationAction(ISD::AND , MVT::v4i32, Legal);
825 setOperationAction(ISD::OR , MVT::v4i32, Legal);
826 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
827 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
828 setOperationAction(ISD::SELECT, MVT::v4i32,
829 Subtarget.useCRBits() ? Legal : Expand);
830 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
831 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
832 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
833 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
834 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
835 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
836 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
837 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
838 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
839 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
840 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
841 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
842 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
843
844 // Without hasP8Altivec set, v2i64 SMAX isn't available.
845 // But ABS custom lowering requires SMAX support.
846 if (!Subtarget.hasP8Altivec())
847 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
848
849 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
850 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
851 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
852 if (Subtarget.hasAltivec())
853 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
854 setOperationAction(ISD::ROTL, VT, Legal);
855 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
856 if (Subtarget.hasP8Altivec())
857 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
858
859 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
860 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
861 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
862 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
863
864 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
865 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
866
867 if (Subtarget.hasVSX()) {
868 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
869 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
870 }
871
872 if (Subtarget.hasP8Altivec())
873 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
874 else
875 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
876
877 if (Subtarget.isISA3_1()) {
878 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
879 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
880 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
881 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
882 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
883 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
884 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
885 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
886 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
887 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
888 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
889 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
890 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
891 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
892 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
893 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
894 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
895 }
896
897 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
898 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
899
900 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
901 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
902
903 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
904 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
905 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
906 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
907
908 // Altivec does not contain unordered floating-point compare instructions
909 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
910 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
911 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
912 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
913
914 if (Subtarget.hasVSX()) {
915 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
916 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
917 if (Subtarget.hasP8Vector()) {
918 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
919 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
920 }
921 if (Subtarget.hasDirectMove() && isPPC64) {
922 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
923 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
924 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
925 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
926 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
927 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
928 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
929 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
930 }
931 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
932
933 // The nearbyint variants are not allowed to raise the inexact exception
934 // so we can only code-gen them with unsafe math.
935 if (TM.Options.UnsafeFPMath) {
936 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
937 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
938 }
939
940 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
941 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
942 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
943 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
944 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
945 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
946 setOperationAction(ISD::FROUND, MVT::f64, Legal);
947 setOperationAction(ISD::FRINT, MVT::f64, Legal);
948
949 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
950 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
951 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
952 setOperationAction(ISD::FROUND, MVT::f32, Legal);
953 setOperationAction(ISD::FRINT, MVT::f32, Legal);
954
955 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
956 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
957
958 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
959 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
960
961 // Share the Altivec comparison restrictions.
962 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
963 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
964 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
965 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
966
967 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
968 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
969
970 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
971
972 if (Subtarget.hasP8Vector())
973 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
974
975 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
976
977 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
978 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
979 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
980
981 if (Subtarget.hasP8Altivec()) {
982 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
983 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
984 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
985
986 // 128 bit shifts can be accomplished via 3 instructions for SHL and
987 // SRL, but not for SRA because of the instructions available:
988 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
989 // doing
990 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
991 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
992 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
993
994 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
995 }
996 else {
997 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
998 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
999 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1000
1001 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1002
1003 // VSX v2i64 only supports non-arithmetic operations.
1004 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1005 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1006 }
1007
1008 if (Subtarget.isISA3_1())
1009 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1010 else
1011 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1012
1013 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1014 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1015 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1016 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1017
1018 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1019
1020 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1021 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1022 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1023 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1024 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1025 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1026 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1027 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1028
1029 // Custom handling for partial vectors of integers converted to
1030 // floating point. We already have optimal handling for v2i32 through
1031 // the DAG combine, so those aren't necessary.
1032 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1033 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1034 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1035 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1036 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1037 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1038 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1039 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1040 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1041 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1042 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1043 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1044 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1045 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1046 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1047 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1048
1049 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1051 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1052 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1053 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1054 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1055
1056 if (Subtarget.hasDirectMove())
1057 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1058 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1059
1060 // Handle constrained floating-point operations of vector.
1061 // The predictor is `hasVSX` because altivec instruction has
1062 // no exception but VSX vector instruction has.
1063 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1064 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1065 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1066 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1067 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1068 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1069 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1070 setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1071 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1072 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1073 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1074 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1075 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1076
1077 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1078 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1079 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1080 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1081 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1082 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1083 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1084 setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1086 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1087 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1088 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1089 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1090
1091 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1092 }
1093
1094 if (Subtarget.hasP8Altivec()) {
1095 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1096 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1097 }
1098
1099 if (Subtarget.hasP9Vector()) {
1100 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1101 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1102
1103 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1104 // SRL, but not for SRA because of the instructions available:
1105 // VS{RL} and VS{RL}O.
1106 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1107 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1108 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1109
1110 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1111 setOperationAction(ISD::FADD, MVT::f128, Legal);
1112 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1113 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1114 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1115 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1116 // No extending loads to f128 on PPC.
1117 for (MVT FPT : MVT::fp_valuetypes())
1118 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1119 setOperationAction(ISD::FMA, MVT::f128, Legal);
1120 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1121 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1122 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1123 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1124 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1125 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1126
1127 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1128 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1129 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1130 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1131 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1132 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1133
1134 setOperationAction(ISD::SELECT, MVT::f128, Expand);
1135 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1136 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1137 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1138 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1139 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1140 // No implementation for these ops for PowerPC.
1141 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1142 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1143 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1144 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1145 setOperationAction(ISD::FREM, MVT::f128, Expand);
1146
1147 // Handle constrained floating-point operations of fp128
1148 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1149 setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1150 setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1151 setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1152 setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1153 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1154 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1155 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1156 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1157 setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1158 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1159 setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1160 setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1161 setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1162 setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1163 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1164 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1165 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1166 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1167 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1168 }
1169
1170 if (Subtarget.hasP9Altivec()) {
1171 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1173
1174 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1175 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1177 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1178 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1179 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1180 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1181 }
1182 }
1183
1184 if (Subtarget.has64BitSupport())
1185 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1186
1187 if (Subtarget.isISA3_1())
1188 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1189
1190 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1191
1192 if (!isPPC64) {
1193 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1194 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1195 }
1196
1197 setBooleanContents(ZeroOrOneBooleanContent);
1198
1199 if (Subtarget.hasAltivec()) {
1200 // Altivec instructions set fields to all zeros or all ones.
1201 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1202 }
1203
1204 if (!isPPC64) {
1205 // These libcalls are not available in 32-bit.
1206 setLibcallName(RTLIB::SHL_I128, nullptr);
1207 setLibcallName(RTLIB::SRL_I128, nullptr);
1208 setLibcallName(RTLIB::SRA_I128, nullptr);
1209 }
1210
1211 if (!isPPC64)
1212 setMaxAtomicSizeInBitsSupported(32);
1213
1214 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1215
1216 // We have target-specific dag combine patterns for the following nodes:
1217 setTargetDAGCombine(ISD::ADD);
1218 setTargetDAGCombine(ISD::SHL);
1219 setTargetDAGCombine(ISD::SRA);
1220 setTargetDAGCombine(ISD::SRL);
1221 setTargetDAGCombine(ISD::MUL);
1222 setTargetDAGCombine(ISD::FMA);
1223 setTargetDAGCombine(ISD::SINT_TO_FP);
1224 setTargetDAGCombine(ISD::BUILD_VECTOR);
1225 if (Subtarget.hasFPCVT())
1226 setTargetDAGCombine(ISD::UINT_TO_FP);
1227 setTargetDAGCombine(ISD::LOAD);
1228 setTargetDAGCombine(ISD::STORE);
1229 setTargetDAGCombine(ISD::BR_CC);
1230 if (Subtarget.useCRBits())
1231 setTargetDAGCombine(ISD::BRCOND);
1232 setTargetDAGCombine(ISD::BSWAP);
1233 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1234 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1235 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1236
1237 setTargetDAGCombine(ISD::SIGN_EXTEND);
1238 setTargetDAGCombine(ISD::ZERO_EXTEND);
1239 setTargetDAGCombine(ISD::ANY_EXTEND);
1240
1241 setTargetDAGCombine(ISD::TRUNCATE);
1242 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1243
1244
1245 if (Subtarget.useCRBits()) {
1246 setTargetDAGCombine(ISD::TRUNCATE);
1247 setTargetDAGCombine(ISD::SETCC);
1248 setTargetDAGCombine(ISD::SELECT_CC);
1249 }
1250
1251 if (Subtarget.hasP9Altivec()) {
1252 setTargetDAGCombine(ISD::ABS);
1253 setTargetDAGCombine(ISD::VSELECT);
1254 }
1255
1256 setLibcallName(RTLIB::LOG_F128, "logf128");
1257 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1258 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1259 setLibcallName(RTLIB::EXP_F128, "expf128");
1260 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1261 setLibcallName(RTLIB::SIN_F128, "sinf128");
1262 setLibcallName(RTLIB::COS_F128, "cosf128");
1263 setLibcallName(RTLIB::POW_F128, "powf128");
1264 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1265 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1266 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1267 setLibcallName(RTLIB::REM_F128, "fmodf128");
1268
1269 // With 32 condition bits, we don't need to sink (and duplicate) compares
1270 // aggressively in CodeGenPrep.
1271 if (Subtarget.useCRBits()) {
1272 setHasMultipleConditionRegisters();
1273 setJumpIsExpensive();
1274 }
1275
1276 setMinFunctionAlignment(Align(4));
1277
1278 switch (Subtarget.getCPUDirective()) {
1279 default: break;
1280 case PPC::DIR_970:
1281 case PPC::DIR_A2:
1282 case PPC::DIR_E500:
1283 case PPC::DIR_E500mc:
1284 case PPC::DIR_E5500:
1285 case PPC::DIR_PWR4:
1286 case PPC::DIR_PWR5:
1287 case PPC::DIR_PWR5X:
1288 case PPC::DIR_PWR6:
1289 case PPC::DIR_PWR6X:
1290 case PPC::DIR_PWR7:
1291 case PPC::DIR_PWR8:
1292 case PPC::DIR_PWR9:
1293 case PPC::DIR_PWR10:
1294 case PPC::DIR_PWR_FUTURE:
1295 setPrefLoopAlignment(Align(16));
1296 setPrefFunctionAlignment(Align(16));
1297 break;
1298 }
1299
1300 if (Subtarget.enableMachineScheduler())
1301 setSchedulingPreference(Sched::Source);
1302 else
1303 setSchedulingPreference(Sched::Hybrid);
1304
1305 computeRegisterProperties(STI.getRegisterInfo());
1306
1307 // The Freescale cores do better with aggressive inlining of memcpy and
1308 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1309 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1310 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1311 MaxStoresPerMemset = 32;
1312 MaxStoresPerMemsetOptSize = 16;
1313 MaxStoresPerMemcpy = 32;
1314 MaxStoresPerMemcpyOptSize = 8;
1315 MaxStoresPerMemmove = 32;
1316 MaxStoresPerMemmoveOptSize = 8;
1317 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1318 // The A2 also benefits from (very) aggressive inlining of memcpy and
1319 // friends. The overhead of a the function call, even when warm, can be
1320 // over one hundred cycles.
1321 MaxStoresPerMemset = 128;
1322 MaxStoresPerMemcpy = 128;
1323 MaxStoresPerMemmove = 128;
1324 MaxLoadsPerMemcmp = 128;
1325 } else {
1326 MaxLoadsPerMemcmp = 8;
1327 MaxLoadsPerMemcmpOptSize = 4;
1328 }
1329
1330 IsStrictFPEnabled = true;
1331
1332 // Let the subtarget (CPU) decide if a predictable select is more expensive
1333 // than the corresponding branch. This information is used in CGP to decide
1334 // when to convert selects into branches.
1335 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1336}
1337
1338/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1339/// the desired ByVal argument alignment.
1340static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1341 if (MaxAlign == MaxMaxAlign)
1342 return;
1343 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1344 if (MaxMaxAlign >= 32 &&
1345 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1346 MaxAlign = Align(32);
1347 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1348 MaxAlign < 16)
1349 MaxAlign = Align(16);
1350 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1351 Align EltAlign;
1352 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1353 if (EltAlign > MaxAlign)
1354 MaxAlign = EltAlign;
1355 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1356 for (auto *EltTy : STy->elements()) {
1357 Align EltAlign;
1358 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1359 if (EltAlign > MaxAlign)
1360 MaxAlign = EltAlign;
1361 if (MaxAlign == MaxMaxAlign)
1362 break;
1363 }
1364 }
1365}
1366
1367/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1368/// function arguments in the caller parameter area.
1369unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1370 const DataLayout &DL) const {
1371 // 16byte and wider vectors are passed on 16byte boundary.
1372 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1373 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1374 if (Subtarget.hasAltivec())
1375 getMaxByValAlign(Ty, Alignment, Align(16));
1376 return Alignment.value();
1377}
1378
1379bool PPCTargetLowering::useSoftFloat() const {
1380 return Subtarget.useSoftFloat();
1381}
1382
1383bool PPCTargetLowering::hasSPE() const {
1384 return Subtarget.hasSPE();
1385}
1386
1387bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1388 return VT.isScalarInteger();
1389}
1390
1391/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
1392/// type is cheaper than a multiply followed by a shift.
1393/// This is true for words and doublewords on 64-bit PowerPC.
1394bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
1395 if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
1396 isOperationLegal(ISD::MULHU, Type)))
1397 return true;
1398 return TargetLowering::isMulhCheaperThanMulShift(Type);
1399}
1400
1401const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1402 switch ((PPCISD::NodeType)Opcode) {
1403 case PPCISD::FIRST_NUMBER: break;
1404 case PPCISD::FSEL: return "PPCISD::FSEL";
1405 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1406 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1407 case PPCISD::FCFID: return "PPCISD::FCFID";
1408 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1409 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1410 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1411 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1412 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1413 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1414 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1415 case PPCISD::FP_TO_UINT_IN_VSR:
1416 return "PPCISD::FP_TO_UINT_IN_VSR,";
1417 case PPCISD::FP_TO_SINT_IN_VSR:
1418 return "PPCISD::FP_TO_SINT_IN_VSR";
1419 case PPCISD::FRE: return "PPCISD::FRE";
1420 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1421 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1422 case PPCISD::VPERM: return "PPCISD::VPERM";
1423 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1424 case PPCISD::XXSPLTI_SP_TO_DP:
1425 return "PPCISD::XXSPLTI_SP_TO_DP";
1426 case PPCISD::XXSPLTI32DX:
1427 return "PPCISD::XXSPLTI32DX";
1428 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1429 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1430 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1431 case PPCISD::CMPB: return "PPCISD::CMPB";
1432 case PPCISD::Hi: return "PPCISD::Hi";
1433 case PPCISD::Lo: return "PPCISD::Lo";
1434 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1435 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1436 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1437 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1438 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1439 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1440 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1441 case PPCISD::SRL: return "PPCISD::SRL";
1442 case PPCISD::SRA: return "PPCISD::SRA";
1443 case PPCISD::SHL: return "PPCISD::SHL";
1444 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1445 case PPCISD::CALL: return "PPCISD::CALL";
1446 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1447 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1448 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1449 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1450 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1451 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1452 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1453 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1454 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1455 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1456 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1457 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1458 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1459 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1460 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1461 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1462 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1463 case PPCISD::ANDI_rec_1_EQ_BIT:
1464 return "PPCISD::ANDI_rec_1_EQ_BIT";
1465 case PPCISD::ANDI_rec_1_GT_BIT:
1466 return "PPCISD::ANDI_rec_1_GT_BIT";
1467 case PPCISD::VCMP: return "PPCISD::VCMP";
1468 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1469 case PPCISD::LBRX: return "PPCISD::LBRX";
1470 case PPCISD::STBRX: return "PPCISD::STBRX";
1471 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1472 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1473 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1474 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1475 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1476 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1477 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1478 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1479 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1480 case PPCISD::ST_VSR_SCAL_INT:
1481 return "PPCISD::ST_VSR_SCAL_INT";
1482 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1483 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1484 case PPCISD::BDZ: return "PPCISD::BDZ";
1485 case PPCISD::MFFS: return "PPCISD::MFFS";
1486 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1487 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1488 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1489 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1490 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1491 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1492 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1493 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1494 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1495 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1496 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1497 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1498 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1499 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1500 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1501 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1502 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1503 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1504 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1505 case PPCISD::PADDI_DTPREL:
1506 return "PPCISD::PADDI_DTPREL";
1507 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1508 case PPCISD::SC: return "PPCISD::SC";
1509 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1510 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1511 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1512 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1513 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1514 case PPCISD::VABSD: return "PPCISD::VABSD";
1515 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1516 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1517 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1518 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1519 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1520 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1521 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1522 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1523 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1524 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1525 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1526 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1527 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1528 case PPCISD::STRICT_FADDRTZ:
1529 return "PPCISD::STRICT_FADDRTZ";
1530 case PPCISD::STRICT_FCTIDZ:
1531 return "PPCISD::STRICT_FCTIDZ";
1532 case PPCISD::STRICT_FCTIWZ:
1533 return "PPCISD::STRICT_FCTIWZ";
1534 case PPCISD::STRICT_FCTIDUZ:
1535 return "PPCISD::STRICT_FCTIDUZ";
1536 case PPCISD::STRICT_FCTIWUZ:
1537 return "PPCISD::STRICT_FCTIWUZ";
1538 case PPCISD::STRICT_FCFID:
1539 return "PPCISD::STRICT_FCFID";
1540 case PPCISD::STRICT_FCFIDU:
1541 return "PPCISD::STRICT_FCFIDU";
1542 case PPCISD::STRICT_FCFIDS:
1543 return "PPCISD::STRICT_FCFIDS";
1544 case PPCISD::STRICT_FCFIDUS:
1545 return "PPCISD::STRICT_FCFIDUS";
1546 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1547 }
1548 return nullptr;
1549}
1550
1551EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1552 EVT VT) const {
1553 if (!VT.isVector())
1554 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1555
1556 return VT.changeVectorElementTypeToInteger();
1557}
1558
1559bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1560 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1560, __PRETTY_FUNCTION__))
;
1561 return true;
1562}
1563
1564//===----------------------------------------------------------------------===//
1565// Node matching predicates, for use by the tblgen matching code.
1566//===----------------------------------------------------------------------===//
1567
1568/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1569static bool isFloatingPointZero(SDValue Op) {
1570 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1571 return CFP->getValueAPF().isZero();
1572 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1573 // Maybe this has already been legalized into the constant pool?
1574 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1575 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1576 return CFP->getValueAPF().isZero();
1577 }
1578 return false;
1579}
1580
1581/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1582/// true if Op is undef or if it matches the specified value.
1583static bool isConstantOrUndef(int Op, int Val) {
1584 return Op < 0 || Op == Val;
1585}
1586
1587/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1588/// VPKUHUM instruction.
1589/// The ShuffleKind distinguishes between big-endian operations with
1590/// two different inputs (0), either-endian operations with two identical
1591/// inputs (1), and little-endian operations with two different inputs (2).
1592/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1593bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1594 SelectionDAG &DAG) {
1595 bool IsLE = DAG.getDataLayout().isLittleEndian();
1596 if (ShuffleKind == 0) {
1597 if (IsLE)
1598 return false;
1599 for (unsigned i = 0; i != 16; ++i)
1600 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1601 return false;
1602 } else if (ShuffleKind == 2) {
1603 if (!IsLE)
1604 return false;
1605 for (unsigned i = 0; i != 16; ++i)
1606 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1607 return false;
1608 } else if (ShuffleKind == 1) {
1609 unsigned j = IsLE ? 0 : 1;
1610 for (unsigned i = 0; i != 8; ++i)
1611 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1612 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1613 return false;
1614 }
1615 return true;
1616}
1617
1618/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1619/// VPKUWUM instruction.
1620/// The ShuffleKind distinguishes between big-endian operations with
1621/// two different inputs (0), either-endian operations with two identical
1622/// inputs (1), and little-endian operations with two different inputs (2).
1623/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1624bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1625 SelectionDAG &DAG) {
1626 bool IsLE = DAG.getDataLayout().isLittleEndian();
1627 if (ShuffleKind == 0) {
1628 if (IsLE)
1629 return false;
1630 for (unsigned i = 0; i != 16; i += 2)
1631 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1632 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1633 return false;
1634 } else if (ShuffleKind == 2) {
1635 if (!IsLE)
1636 return false;
1637 for (unsigned i = 0; i != 16; i += 2)
1638 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1639 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1640 return false;
1641 } else if (ShuffleKind == 1) {
1642 unsigned j = IsLE ? 0 : 2;
1643 for (unsigned i = 0; i != 8; i += 2)
1644 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1645 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1646 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1647 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1648 return false;
1649 }
1650 return true;
1651}
1652
1653/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1654/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1655/// current subtarget.
1656///
1657/// The ShuffleKind distinguishes between big-endian operations with
1658/// two different inputs (0), either-endian operations with two identical
1659/// inputs (1), and little-endian operations with two different inputs (2).
1660/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1661bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1662 SelectionDAG &DAG) {
1663 const PPCSubtarget& Subtarget =
1664 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1665 if (!Subtarget.hasP8Vector())
1666 return false;
1667
1668 bool IsLE = DAG.getDataLayout().isLittleEndian();
1669 if (ShuffleKind == 0) {
1670 if (IsLE)
1671 return false;
1672 for (unsigned i = 0; i != 16; i += 4)
1673 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1674 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1675 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1676 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1677 return false;
1678 } else if (ShuffleKind == 2) {
1679 if (!IsLE)
1680 return false;
1681 for (unsigned i = 0; i != 16; i += 4)
1682 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1683 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1684 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1685 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1686 return false;
1687 } else if (ShuffleKind == 1) {
1688 unsigned j = IsLE ? 0 : 4;
1689 for (unsigned i = 0; i != 8; i += 4)
1690 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1691 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1692 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1693 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1694 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1695 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1696 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1697 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1698 return false;
1699 }
1700 return true;
1701}
1702
1703/// isVMerge - Common function, used to match vmrg* shuffles.
1704///
1705static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1706 unsigned LHSStart, unsigned RHSStart) {
1707 if (N->getValueType(0) != MVT::v16i8)
1708 return false;
1709 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1710, __PRETTY_FUNCTION__))
1710 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1710, __PRETTY_FUNCTION__))
;
1711
1712 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1713 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1714 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1715 LHSStart+j+i*UnitSize) ||
1716 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1717 RHSStart+j+i*UnitSize))
1718 return false;
1719 }
1720 return true;
1721}
1722
1723/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1724/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1725/// The ShuffleKind distinguishes between big-endian merges with two
1726/// different inputs (0), either-endian merges with two identical inputs (1),
1727/// and little-endian merges with two different inputs (2). For the latter,
1728/// the input operands are swapped (see PPCInstrAltivec.td).
1729bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1730 unsigned ShuffleKind, SelectionDAG &DAG) {
1731 if (DAG.getDataLayout().isLittleEndian()) {
1732 if (ShuffleKind == 1) // unary
1733 return isVMerge(N, UnitSize, 0, 0);
1734 else if (ShuffleKind == 2) // swapped
1735 return isVMerge(N, UnitSize, 0, 16);
1736 else
1737 return false;
1738 } else {
1739 if (ShuffleKind == 1) // unary
1740 return isVMerge(N, UnitSize, 8, 8);
1741 else if (ShuffleKind == 0) // normal
1742 return isVMerge(N, UnitSize, 8, 24);
1743 else
1744 return false;
1745 }
1746}
1747
1748/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1749/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1750/// The ShuffleKind distinguishes between big-endian merges with two
1751/// different inputs (0), either-endian merges with two identical inputs (1),
1752/// and little-endian merges with two different inputs (2). For the latter,
1753/// the input operands are swapped (see PPCInstrAltivec.td).
1754bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1755 unsigned ShuffleKind, SelectionDAG &DAG) {
1756 if (DAG.getDataLayout().isLittleEndian()) {
1757 if (ShuffleKind == 1) // unary
1758 return isVMerge(N, UnitSize, 8, 8);
1759 else if (ShuffleKind == 2) // swapped
1760 return isVMerge(N, UnitSize, 8, 24);
1761 else
1762 return false;
1763 } else {
1764 if (ShuffleKind == 1) // unary
1765 return isVMerge(N, UnitSize, 0, 0);
1766 else if (ShuffleKind == 0) // normal
1767 return isVMerge(N, UnitSize, 0, 16);
1768 else
1769 return false;
1770 }
1771}
1772
1773/**
1774 * Common function used to match vmrgew and vmrgow shuffles
1775 *
1776 * The indexOffset determines whether to look for even or odd words in
1777 * the shuffle mask. This is based on the of the endianness of the target
1778 * machine.
1779 * - Little Endian:
1780 * - Use offset of 0 to check for odd elements
1781 * - Use offset of 4 to check for even elements
1782 * - Big Endian:
1783 * - Use offset of 0 to check for even elements
1784 * - Use offset of 4 to check for odd elements
1785 * A detailed description of the vector element ordering for little endian and
1786 * big endian can be found at
1787 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1788 * Targeting your applications - what little endian and big endian IBM XL C/C++
1789 * compiler differences mean to you
1790 *
1791 * The mask to the shuffle vector instruction specifies the indices of the
1792 * elements from the two input vectors to place in the result. The elements are
1793 * numbered in array-access order, starting with the first vector. These vectors
1794 * are always of type v16i8, thus each vector will contain 16 elements of size
1795 * 8. More info on the shuffle vector can be found in the
1796 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1797 * Language Reference.
1798 *
1799 * The RHSStartValue indicates whether the same input vectors are used (unary)
1800 * or two different input vectors are used, based on the following:
1801 * - If the instruction uses the same vector for both inputs, the range of the
1802 * indices will be 0 to 15. In this case, the RHSStart value passed should
1803 * be 0.
1804 * - If the instruction has two different vectors then the range of the
1805 * indices will be 0 to 31. In this case, the RHSStart value passed should
1806 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1807 * to 31 specify elements in the second vector).
1808 *
1809 * \param[in] N The shuffle vector SD Node to analyze
1810 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1811 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1812 * vector to the shuffle_vector instruction
1813 * \return true iff this shuffle vector represents an even or odd word merge
1814 */
1815static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1816 unsigned RHSStartValue) {
1817 if (N->getValueType(0) != MVT::v16i8)
1818 return false;
1819
1820 for (unsigned i = 0; i < 2; ++i)
1821 for (unsigned j = 0; j < 4; ++j)
1822 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1823 i*RHSStartValue+j+IndexOffset) ||
1824 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1825 i*RHSStartValue+j+IndexOffset+8))
1826 return false;
1827 return true;
1828}
1829
1830/**
1831 * Determine if the specified shuffle mask is suitable for the vmrgew or
1832 * vmrgow instructions.
1833 *
1834 * \param[in] N The shuffle vector SD Node to analyze
1835 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1836 * \param[in] ShuffleKind Identify the type of merge:
1837 * - 0 = big-endian merge with two different inputs;
1838 * - 1 = either-endian merge with two identical inputs;
1839 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1840 * little-endian merges).
1841 * \param[in] DAG The current SelectionDAG
1842 * \return true iff this shuffle mask
1843 */
1844bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1845 unsigned ShuffleKind, SelectionDAG &DAG) {
1846 if (DAG.getDataLayout().isLittleEndian()) {
1847 unsigned indexOffset = CheckEven ? 4 : 0;
1848 if (ShuffleKind == 1) // Unary
1849 return isVMerge(N, indexOffset, 0);
1850 else if (ShuffleKind == 2) // swapped
1851 return isVMerge(N, indexOffset, 16);
1852 else
1853 return false;
1854 }
1855 else {
1856 unsigned indexOffset = CheckEven ? 0 : 4;
1857 if (ShuffleKind == 1) // Unary
1858 return isVMerge(N, indexOffset, 0);
1859 else if (ShuffleKind == 0) // Normal
1860 return isVMerge(N, indexOffset, 16);
1861 else
1862 return false;
1863 }
1864 return false;
1865}
1866
1867/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1868/// amount, otherwise return -1.
1869/// The ShuffleKind distinguishes between big-endian operations with two
1870/// different inputs (0), either-endian operations with two identical inputs
1871/// (1), and little-endian operations with two different inputs (2). For the
1872/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1873int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1874 SelectionDAG &DAG) {
1875 if (N->getValueType(0) != MVT::v16i8)
1876 return -1;
1877
1878 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1879
1880 // Find the first non-undef value in the shuffle mask.
1881 unsigned i;
1882 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1883 /*search*/;
1884
1885 if (i == 16) return -1; // all undef.
1886
1887 // Otherwise, check to see if the rest of the elements are consecutively
1888 // numbered from this value.
1889 unsigned ShiftAmt = SVOp->getMaskElt(i);
1890 if (ShiftAmt < i) return -1;
1891
1892 ShiftAmt -= i;
1893 bool isLE = DAG.getDataLayout().isLittleEndian();
1894
1895 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1896 // Check the rest of the elements to see if they are consecutive.
1897 for (++i; i != 16; ++i)
1898 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1899 return -1;
1900 } else if (ShuffleKind == 1) {
1901 // Check the rest of the elements to see if they are consecutive.
1902 for (++i; i != 16; ++i)
1903 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1904 return -1;
1905 } else
1906 return -1;
1907
1908 if (isLE)
1909 ShiftAmt = 16 - ShiftAmt;
1910
1911 return ShiftAmt;
1912}
1913
1914/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1915/// specifies a splat of a single element that is suitable for input to
1916/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1917bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1918 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1919, __PRETTY_FUNCTION__))
1919 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1919, __PRETTY_FUNCTION__))
;
1920
1921 // The consecutive indices need to specify an element, not part of two
1922 // different elements. So abandon ship early if this isn't the case.
1923 if (N->getMaskElt(0) % EltSize != 0)
1924 return false;
1925
1926 // This is a splat operation if each element of the permute is the same, and
1927 // if the value doesn't reference the second vector.
1928 unsigned ElementBase = N->getMaskElt(0);
1929
1930 // FIXME: Handle UNDEF elements too!
1931 if (ElementBase >= 16)
1932 return false;
1933
1934 // Check that the indices are consecutive, in the case of a multi-byte element
1935 // splatted with a v16i8 mask.
1936 for (unsigned i = 1; i != EltSize; ++i)
1937 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1938 return false;
1939
1940 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1941 if (N->getMaskElt(i) < 0) continue;
1942 for (unsigned j = 0; j != EltSize; ++j)
1943 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1944 return false;
1945 }
1946 return true;
1947}
1948
1949/// Check that the mask is shuffling N byte elements. Within each N byte
1950/// element of the mask, the indices could be either in increasing or
1951/// decreasing order as long as they are consecutive.
1952/// \param[in] N the shuffle vector SD Node to analyze
1953/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1954/// Word/DoubleWord/QuadWord).
1955/// \param[in] StepLen the delta indices number among the N byte element, if
1956/// the mask is in increasing/decreasing order then it is 1/-1.
1957/// \return true iff the mask is shuffling N byte elements.
1958static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1959 int StepLen) {
1960 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1961, __PRETTY_FUNCTION__))
1961 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1961, __PRETTY_FUNCTION__))
;
1962 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1962, __PRETTY_FUNCTION__))
;
1963
1964 unsigned NumOfElem = 16 / Width;
1965 unsigned MaskVal[16]; // Width is never greater than 16
1966 for (unsigned i = 0; i < NumOfElem; ++i) {
1967 MaskVal[0] = N->getMaskElt(i * Width);
1968 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1969 return false;
1970 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1971 return false;
1972 }
1973
1974 for (unsigned int j = 1; j < Width; ++j) {
1975 MaskVal[j] = N->getMaskElt(i * Width + j);
1976 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1977 return false;
1978 }
1979 }
1980 }
1981
1982 return true;
1983}
1984
1985bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1986 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1987 if (!isNByteElemShuffleMask(N, 4, 1))
1988 return false;
1989
1990 // Now we look at mask elements 0,4,8,12
1991 unsigned M0 = N->getMaskElt(0) / 4;
1992 unsigned M1 = N->getMaskElt(4) / 4;
1993 unsigned M2 = N->getMaskElt(8) / 4;
1994 unsigned M3 = N->getMaskElt(12) / 4;
1995 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1996 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1997
1998 // Below, let H and L be arbitrary elements of the shuffle mask
1999 // where H is in the range [4,7] and L is in the range [0,3].
2000 // H, 1, 2, 3 or L, 5, 6, 7
2001 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2002 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2003 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2004 InsertAtByte = IsLE ? 12 : 0;
2005 Swap = M0 < 4;
2006 return true;
2007 }
2008 // 0, H, 2, 3 or 4, L, 6, 7
2009 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2010 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2011 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2012 InsertAtByte = IsLE ? 8 : 4;
2013 Swap = M1 < 4;
2014 return true;
2015 }
2016 // 0, 1, H, 3 or 4, 5, L, 7
2017 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2018 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2019 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2020 InsertAtByte = IsLE ? 4 : 8;
2021 Swap = M2 < 4;
2022 return true;
2023 }
2024 // 0, 1, 2, H or 4, 5, 6, L
2025 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2026 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2027 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2028 InsertAtByte = IsLE ? 0 : 12;
2029 Swap = M3 < 4;
2030 return true;
2031 }
2032
2033 // If both vector operands for the shuffle are the same vector, the mask will
2034 // contain only elements from the first one and the second one will be undef.
2035 if (N->getOperand(1).isUndef()) {
2036 ShiftElts = 0;
2037 Swap = true;
2038 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2039 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2040 InsertAtByte = IsLE ? 12 : 0;
2041 return true;
2042 }
2043 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2044 InsertAtByte = IsLE ? 8 : 4;
2045 return true;
2046 }
2047 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2048 InsertAtByte = IsLE ? 4 : 8;
2049 return true;
2050 }
2051 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2052 InsertAtByte = IsLE ? 0 : 12;
2053 return true;
2054 }
2055 }
2056
2057 return false;
2058}
2059
2060bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2061 bool &Swap, bool IsLE) {
2062 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2062, __PRETTY_FUNCTION__))
;
2063 // Ensure each byte index of the word is consecutive.
2064 if (!isNByteElemShuffleMask(N, 4, 1))
2065 return false;
2066
2067 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2068 unsigned M0 = N->getMaskElt(0) / 4;
2069 unsigned M1 = N->getMaskElt(4) / 4;
2070 unsigned M2 = N->getMaskElt(8) / 4;
2071 unsigned M3 = N->getMaskElt(12) / 4;
2072
2073 // If both vector operands for the shuffle are the same vector, the mask will
2074 // contain only elements from the first one and the second one will be undef.
2075 if (N->getOperand(1).isUndef()) {
2076 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2076, __PRETTY_FUNCTION__))
;
2077 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2078 return false;
2079
2080 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2081 Swap = false;
2082 return true;
2083 }
2084
2085 // Ensure each word index of the ShuffleVector Mask is consecutive.
2086 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2087 return false;
2088
2089 if (IsLE) {
2090 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2091 // Input vectors don't need to be swapped if the leading element
2092 // of the result is one of the 3 left elements of the second vector
2093 // (or if there is no shift to be done at all).
2094 Swap = false;
2095 ShiftElts = (8 - M0) % 8;
2096 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2097 // Input vectors need to be swapped if the leading element
2098 // of the result is one of the 3 left elements of the first vector
2099 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2100 Swap = true;
2101 ShiftElts = (4 - M0) % 4;
2102 }
2103
2104 return true;
2105 } else { // BE
2106 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2107 // Input vectors don't need to be swapped if the leading element
2108 // of the result is one of the 4 elements of the first vector.
2109 Swap = false;
2110 ShiftElts = M0;
2111 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2112 // Input vectors need to be swapped if the leading element
2113 // of the result is one of the 4 elements of the right vector.
2114 Swap = true;
2115 ShiftElts = M0 - 4;
2116 }
2117
2118 return true;
2119 }
2120}
2121
2122bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2123 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2123, __PRETTY_FUNCTION__))
;
2124
2125 if (!isNByteElemShuffleMask(N, Width, -1))
2126 return false;
2127
2128 for (int i = 0; i < 16; i += Width)
2129 if (N->getMaskElt(i) != i + Width - 1)
2130 return false;
2131
2132 return true;
2133}
2134
2135bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2136 return isXXBRShuffleMaskHelper(N, 2);
2137}
2138
2139bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2140 return isXXBRShuffleMaskHelper(N, 4);
2141}
2142
2143bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2144 return isXXBRShuffleMaskHelper(N, 8);
2145}
2146
2147bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2148 return isXXBRShuffleMaskHelper(N, 16);
2149}
2150
2151/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2152/// if the inputs to the instruction should be swapped and set \p DM to the
2153/// value for the immediate.
2154/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2155/// AND element 0 of the result comes from the first input (LE) or second input
2156/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2157/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2158/// mask.
2159bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2160 bool &Swap, bool IsLE) {
2161 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2161, __PRETTY_FUNCTION__))
;
2162
2163 // Ensure each byte index of the double word is consecutive.
2164 if (!isNByteElemShuffleMask(N, 8, 1))
2165 return false;
2166
2167 unsigned M0 = N->getMaskElt(0) / 8;
2168 unsigned M1 = N->getMaskElt(8) / 8;
2169 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2169, __PRETTY_FUNCTION__))
;
2170
2171 // If both vector operands for the shuffle are the same vector, the mask will
2172 // contain only elements from the first one and the second one will be undef.
2173 if (N->getOperand(1).isUndef()) {
2174 if ((M0 | M1) < 2) {
2175 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2176 Swap = false;
2177 return true;
2178 } else
2179 return false;
2180 }
2181
2182 if (IsLE) {
2183 if (M0 > 1 && M1 < 2) {
2184 Swap = false;
2185 } else if (M0 < 2 && M1 > 1) {
2186 M0 = (M0 + 2) % 4;
2187 M1 = (M1 + 2) % 4;
2188 Swap = true;
2189 } else
2190 return false;
2191
2192 // Note: if control flow comes here that means Swap is already set above
2193 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2194 return true;
2195 } else { // BE
2196 if (M0 < 2 && M1 > 1) {
2197 Swap = false;
2198 } else if (M0 > 1 && M1 < 2) {
2199 M0 = (M0 + 2) % 4;
2200 M1 = (M1 + 2) % 4;
2201 Swap = true;
2202 } else
2203 return false;
2204
2205 // Note: if control flow comes here that means Swap is already set above
2206 DM = (M0 << 1) + (M1 & 1);
2207 return true;
2208 }
2209}
2210
2211
2212/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2213/// appropriate for PPC mnemonics (which have a big endian bias - namely
2214/// elements are counted from the left of the vector register).
2215unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2216 SelectionDAG &DAG) {
2217 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2218 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2218, __PRETTY_FUNCTION__))
;
2219 if (DAG.getDataLayout().isLittleEndian())
2220 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2221 else
2222 return SVOp->getMaskElt(0) / EltSize;
2223}
2224
2225/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2226/// by using a vspltis[bhw] instruction of the specified element size, return
2227/// the constant being splatted. The ByteSize field indicates the number of
2228/// bytes of each element [124] -> [bhw].
2229SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2230 SDValue OpVal(nullptr, 0);
2231
2232 // If ByteSize of the splat is bigger than the element size of the
2233 // build_vector, then we have a case where we are checking for a splat where
2234 // multiple elements of the buildvector are folded together into a single
2235 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2236 unsigned EltSize = 16/N->getNumOperands();
2237 if (EltSize < ByteSize) {
2238 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2239 SDValue UniquedVals[4];
2240 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2240, __PRETTY_FUNCTION__))
;
2241
2242 // See if all of the elements in the buildvector agree across.
2243 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2244 if (N->getOperand(i).isUndef()) continue;
2245 // If the element isn't a constant, bail fully out.
2246 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2247
2248 if (!UniquedVals[i&(Multiple-1)].getNode())
2249 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2250 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2251 return SDValue(); // no match.
2252 }
2253
2254 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2255 // either constant or undef values that are identical for each chunk. See
2256 // if these chunks can form into a larger vspltis*.
2257
2258 // Check to see if all of the leading entries are either 0 or -1. If
2259 // neither, then this won't fit into the immediate field.
2260 bool LeadingZero = true;
2261 bool LeadingOnes = true;
2262 for (unsigned i = 0; i != Multiple-1; ++i) {
2263 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2264
2265 LeadingZero &= isNullConstant(UniquedVals[i]);
2266 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2267 }
2268 // Finally, check the least significant entry.
2269 if (LeadingZero) {
2270 if (!UniquedVals[Multiple-1].getNode())
2271 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2272 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2273 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2274 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2275 }
2276 if (LeadingOnes) {
2277 if (!UniquedVals[Multiple-1].getNode())
2278 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2279 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2280 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2281 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2282 }
2283
2284 return SDValue();
2285 }
2286
2287 // Check to see if this buildvec has a single non-undef value in its elements.
2288 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2289 if (N->getOperand(i).isUndef()) continue;
2290 if (!OpVal.getNode())
2291 OpVal = N->getOperand(i);
2292 else if (OpVal != N->getOperand(i))
2293 return SDValue();
2294 }
2295
2296 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2297
2298 unsigned ValSizeInBytes = EltSize;
2299 uint64_t Value = 0;
2300 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2301 Value = CN->getZExtValue();
2302 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2303 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2303, __PRETTY_FUNCTION__))
;
2304 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2305 }
2306
2307 // If the splat value is larger than the element value, then we can never do
2308 // this splat. The only case that we could fit the replicated bits into our
2309 // immediate field for would be zero, and we prefer to use vxor for it.
2310 if (ValSizeInBytes < ByteSize) return SDValue();
2311
2312 // If the element value is larger than the splat value, check if it consists
2313 // of a repeated bit pattern of size ByteSize.
2314 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2315 return SDValue();
2316
2317 // Properly sign extend the value.
2318 int MaskVal = SignExtend32(Value, ByteSize * 8);
2319
2320 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2321 if (MaskVal == 0) return SDValue();
2322
2323 // Finally, if this value fits in a 5 bit sext field, return it
2324 if (SignExtend32<5>(MaskVal) == MaskVal)
2325 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2326 return SDValue();
2327}
2328
2329/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2330/// amount, otherwise return -1.
2331int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2332 EVT VT = N->getValueType(0);
2333 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2334 return -1;
2335
2336 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2337
2338 // Find the first non-undef value in the shuffle mask.
2339 unsigned i;
2340 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2341 /*search*/;
2342
2343 if (i == 4) return -1; // all undef.
2344
2345 // Otherwise, check to see if the rest of the elements are consecutively
2346 // numbered from this value.
2347 unsigned ShiftAmt = SVOp->getMaskElt(i);
2348 if (ShiftAmt < i) return -1;
2349 ShiftAmt -= i;
2350
2351 // Check the rest of the elements to see if they are consecutive.
2352 for (++i; i != 4; ++i)
2353 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2354 return -1;
2355
2356 return ShiftAmt;
2357}
2358
2359//===----------------------------------------------------------------------===//
2360// Addressing Mode Selection
2361//===----------------------------------------------------------------------===//
2362
2363/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2364/// or 64-bit immediate, and if the value can be accurately represented as a
2365/// sign extension from a 16-bit value. If so, this returns true and the
2366/// immediate.
2367bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2368 if (!isa<ConstantSDNode>(N))
2369 return false;
2370
2371 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2372 if (N->getValueType(0) == MVT::i32)
2373 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2374 else
2375 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2376}
2377bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2378 return isIntS16Immediate(Op.getNode(), Imm);
2379}
2380
2381
2382/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2383/// be represented as an indexed [r+r] operation.
2384bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2385 SDValue &Index,
2386 SelectionDAG &DAG) const {
2387 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2388 UI != E; ++UI) {
2389 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2390 if (Memop->getMemoryVT() == MVT::f64) {
2391 Base = N.getOperand(0);
2392 Index = N.getOperand(1);
2393 return true;
2394 }
2395 }
2396 }
2397 return false;
2398}
2399
2400/// SelectAddressRegReg - Given the specified addressed, check to see if it
2401/// can be represented as an indexed [r+r] operation. Returns false if it
2402/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2403/// non-zero and N can be represented by a base register plus a signed 16-bit
2404/// displacement, make a more precise judgement by checking (displacement % \p
2405/// EncodingAlignment).
2406bool PPCTargetLowering::SelectAddressRegReg(
2407 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2408 MaybeAlign EncodingAlignment) const {
2409 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2410 // a [pc+imm].
2411 if (SelectAddressPCRel(N, Base))
2412 return false;
2413
2414 int16_t Imm = 0;
2415 if (N.getOpcode() == ISD::ADD) {
2416 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2417 // SPE load/store can only handle 8-bit offsets.
2418 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2419 return true;
2420 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2421 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2422 return false; // r+i
2423 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2424 return false; // r+i
2425
2426 Base = N.getOperand(0);
2427 Index = N.getOperand(1);
2428 return true;
2429 } else if (N.getOpcode() == ISD::OR) {
2430 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2431 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2432 return false; // r+i can fold it if we can.
2433
2434 // If this is an or of disjoint bitfields, we can codegen this as an add
2435 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2436 // disjoint.
2437 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2438
2439 if (LHSKnown.Zero.getBoolValue()) {
2440 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2441 // If all of the bits are known zero on the LHS or RHS, the add won't
2442 // carry.
2443 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2444 Base = N.getOperand(0);
2445 Index = N.getOperand(1);
2446 return true;
2447 }
2448 }
2449 }
2450
2451 return false;
2452}
2453
2454// If we happen to be doing an i64 load or store into a stack slot that has
2455// less than a 4-byte alignment, then the frame-index elimination may need to
2456// use an indexed load or store instruction (because the offset may not be a
2457// multiple of 4). The extra register needed to hold the offset comes from the
2458// register scavenger, and it is possible that the scavenger will need to use
2459// an emergency spill slot. As a result, we need to make sure that a spill slot
2460// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2461// stack slot.
2462static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2463 // FIXME: This does not handle the LWA case.
2464 if (VT != MVT::i64)
2465 return;
2466
2467 // NOTE: We'll exclude negative FIs here, which come from argument
2468 // lowering, because there are no known test cases triggering this problem
2469 // using packed structures (or similar). We can remove this exclusion if
2470 // we find such a test case. The reason why this is so test-case driven is
2471 // because this entire 'fixup' is only to prevent crashes (from the
2472 // register scavenger) on not-really-valid inputs. For example, if we have:
2473 // %a = alloca i1
2474 // %b = bitcast i1* %a to i64*
2475 // store i64* a, i64 b
2476 // then the store should really be marked as 'align 1', but is not. If it
2477 // were marked as 'align 1' then the indexed form would have been
2478 // instruction-selected initially, and the problem this 'fixup' is preventing
2479 // won't happen regardless.
2480 if (FrameIdx < 0)
2481 return;
2482
2483 MachineFunction &MF = DAG.getMachineFunction();
2484 MachineFrameInfo &MFI = MF.getFrameInfo();
2485
2486 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2487 return;
2488
2489 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2490 FuncInfo->setHasNonRISpills();
2491}
2492
2493/// Returns true if the address N can be represented by a base register plus
2494/// a signed 16-bit displacement [r+imm], and if it is not better
2495/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2496/// displacements that are multiples of that value.
2497bool PPCTargetLowering::SelectAddressRegImm(
2498 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2499 MaybeAlign EncodingAlignment) const {
2500 // FIXME dl should come from parent load or store, not from address
2501 SDLoc dl(N);
2502
2503 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2504 // a [pc+imm].
2505 if (SelectAddressPCRel(N, Base))
2506 return false;
2507
2508 // If this can be more profitably realized as r+r, fail.
2509 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2510 return false;
2511
2512 if (N.getOpcode() == ISD::ADD) {
2513 int16_t imm = 0;
2514 if (isIntS16Immediate(N.getOperand(1), imm) &&
2515 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2516 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2517 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2518 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2519 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2520 } else {
2521 Base = N.getOperand(0);
2522 }
2523 return true; // [r+i]
2524 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2525 // Match LOAD (ADD (X, Lo(G))).
2526 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2527, __PRETTY_FUNCTION__))
2527 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2527, __PRETTY_FUNCTION__))
;
2528 Disp = N.getOperand(1).getOperand(0); // The global address.
2529 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2532, __PRETTY_FUNCTION__))
2530 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2532, __PRETTY_FUNCTION__))
2531 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2532, __PRETTY_FUNCTION__))
2532 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2532, __PRETTY_FUNCTION__))
;
2533 Base = N.getOperand(0);
2534 return true; // [&g+r]
2535 }
2536 } else if (N.getOpcode() == ISD::OR) {
2537 int16_t imm = 0;
2538 if (isIntS16Immediate(N.getOperand(1), imm) &&
2539 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2540 // If this is an or of disjoint bitfields, we can codegen this as an add
2541 // (for better address arithmetic) if the LHS and RHS of the OR are
2542 // provably disjoint.
2543 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2544
2545 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2546 // If all of the bits are known zero on the LHS or RHS, the add won't
2547 // carry.
2548 if (FrameIndexSDNode *FI =
2549 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2550 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2551 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2552 } else {
2553 Base = N.getOperand(0);
2554 }
2555 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2556 return true;
2557 }
2558 }
2559 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2560 // Loading from a constant address.
2561
2562 // If this address fits entirely in a 16-bit sext immediate field, codegen
2563 // this as "d, 0"
2564 int16_t Imm;
2565 if (isIntS16Immediate(CN, Imm) &&
2566 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2567 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2568 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2569 CN->getValueType(0));
2570 return true;
2571 }
2572
2573 // Handle 32-bit sext immediates with LIS + addr mode.
2574 if ((CN->getValueType(0) == MVT::i32 ||
2575 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2576 (!EncodingAlignment ||
2577 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2578 int Addr = (int)CN->getZExtValue();
2579
2580 // Otherwise, break this down into an LIS + disp.
2581 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2582
2583 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2584 MVT::i32);
2585 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2586 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2587 return true;
2588 }
2589 }
2590
2591 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2592 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2593 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2594 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2595 } else
2596 Base = N;
2597 return true; // [r+0]
2598}
2599
2600/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2601/// represented as an indexed [r+r] operation.
2602bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2603 SDValue &Index,
2604 SelectionDAG &DAG) const {
2605 // Check to see if we can easily represent this as an [r+r] address. This
2606 // will fail if it thinks that the address is more profitably represented as
2607 // reg+imm, e.g. where imm = 0.
2608 if (SelectAddressRegReg(N, Base, Index, DAG))
2609 return true;
2610
2611 // If the address is the result of an add, we will utilize the fact that the
2612 // address calculation includes an implicit add. However, we can reduce
2613 // register pressure if we do not materialize a constant just for use as the
2614 // index register. We only get rid of the add if it is not an add of a
2615 // value and a 16-bit signed constant and both have a single use.
2616 int16_t imm = 0;
2617 if (N.getOpcode() == ISD::ADD &&
2618 (!isIntS16Immediate(N.getOperand(1), imm) ||
2619 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2620 Base = N.getOperand(0);
2621 Index = N.getOperand(1);
2622 return true;
2623 }
2624
2625 // Otherwise, do it the hard way, using R0 as the base register.
2626 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2627 N.getValueType());
2628 Index = N;
2629 return true;
2630}
2631
2632template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2633 Ty *PCRelCand = dyn_cast<Ty>(N);
2634 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2635}
2636
2637/// Returns true if this address is a PC Relative address.
2638/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2639/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2640bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2641 // This is a materialize PC Relative node. Always select this as PC Relative.
2642 Base = N;
2643 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2644 return true;
2645 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2646 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2647 isValidPCRelNode<JumpTableSDNode>(N) ||
2648 isValidPCRelNode<BlockAddressSDNode>(N))
2649 return true;
2650 return false;
2651}
2652
2653/// Returns true if we should use a direct load into vector instruction
2654/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2655static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2656
2657 // If there are any other uses other than scalar to vector, then we should
2658 // keep it as a scalar load -> direct move pattern to prevent multiple
2659 // loads.
2660 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2661 if (!LD)
2662 return false;
2663
2664 EVT MemVT = LD->getMemoryVT();
2665 if (!MemVT.isSimple())
2666 return false;
2667 switch(MemVT.getSimpleVT().SimpleTy) {
2668 case MVT::i64:
2669 break;
2670 case MVT::i32:
2671 if (!ST.hasP8Vector())
2672 return false;
2673 break;
2674 case MVT::i16:
2675 case MVT::i8:
2676 if (!ST.hasP9Vector())
2677 return false;
2678 break;
2679 default:
2680 return false;
2681 }
2682
2683 SDValue LoadedVal(N, 0);
2684 if (!LoadedVal.hasOneUse())
2685 return false;
2686
2687 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2688 UI != UE; ++UI)
2689 if (UI.getUse().get().getResNo() == 0 &&
2690 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2691 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2692 return false;
2693
2694 return true;
2695}
2696
2697/// getPreIndexedAddressParts - returns true by value, base pointer and
2698/// offset pointer and addressing mode by reference if the node's address
2699/// can be legally represented as pre-indexed load / store address.
2700bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2701 SDValue &Offset,
2702 ISD::MemIndexedMode &AM,
2703 SelectionDAG &DAG) const {
2704 if (DisablePPCPreinc) return false;
2705
2706 bool isLoad = true;
2707 SDValue Ptr;
2708 EVT VT;
2709 unsigned Alignment;
2710 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2711 Ptr = LD->getBasePtr();
2712 VT = LD->getMemoryVT();
2713 Alignment = LD->getAlignment();
2714 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2715 Ptr = ST->getBasePtr();
2716 VT = ST->getMemoryVT();
2717 Alignment = ST->getAlignment();
2718 isLoad = false;
2719 } else
2720 return false;
2721
2722 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2723 // instructions because we can fold these into a more efficient instruction
2724 // instead, (such as LXSD).
2725 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2726 return false;
2727 }
2728
2729 // PowerPC doesn't have preinc load/store instructions for vectors
2730 if (VT.isVector())
2731 return false;
2732
2733 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2734 // Common code will reject creating a pre-inc form if the base pointer
2735 // is a frame index, or if N is a store and the base pointer is either
2736 // the same as or a predecessor of the value being stored. Check for
2737 // those situations here, and try with swapped Base/Offset instead.
2738 bool Swap = false;
2739
2740 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2741 Swap = true;
2742 else if (!isLoad) {
2743 SDValue Val = cast<StoreSDNode>(N)->getValue();
2744 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2745 Swap = true;
2746 }
2747
2748 if (Swap)
2749 std::swap(Base, Offset);
2750
2751 AM = ISD::PRE_INC;
2752 return true;
2753 }
2754
2755 // LDU/STU can only handle immediates that are a multiple of 4.
2756 if (VT != MVT::i64) {
2757 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2758 return false;
2759 } else {
2760 // LDU/STU need an address with at least 4-byte alignment.
2761 if (Alignment < 4)
2762 return false;
2763
2764 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2765 return false;
2766 }
2767
2768 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2769 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2770 // sext i32 to i64 when addr mode is r+i.
2771 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2772 LD->getExtensionType() == ISD::SEXTLOAD &&
2773 isa<ConstantSDNode>(Offset))
2774 return false;
2775 }
2776
2777 AM = ISD::PRE_INC;
2778 return true;
2779}
2780
2781//===----------------------------------------------------------------------===//
2782// LowerOperation implementation
2783//===----------------------------------------------------------------------===//
2784
2785/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2786/// and LoOpFlags to the target MO flags.
2787static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2788 unsigned &HiOpFlags, unsigned &LoOpFlags,
2789 const GlobalValue *GV = nullptr) {
2790 HiOpFlags = PPCII::MO_HA;
2791 LoOpFlags = PPCII::MO_LO;
2792
2793 // Don't use the pic base if not in PIC relocation model.
2794 if (IsPIC) {
2795 HiOpFlags |= PPCII::MO_PIC_FLAG;
2796 LoOpFlags |= PPCII::MO_PIC_FLAG;
2797 }
2798}
2799
2800static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2801 SelectionDAG &DAG) {
2802 SDLoc DL(HiPart);
2803 EVT PtrVT = HiPart.getValueType();
2804 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2805
2806 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2807 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2808
2809 // With PIC, the first instruction is actually "GR+hi(&G)".
2810 if (isPIC)
2811 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2812 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2813
2814 // Generate non-pic code that has direct accesses to the constant pool.
2815 // The address of the global is just (hi(&g)+lo(&g)).
2816 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2817}
2818
2819static void setUsesTOCBasePtr(MachineFunction &MF) {
2820 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2821 FuncInfo->setUsesTOCBasePtr();
2822}
2823
2824static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2825 setUsesTOCBasePtr(DAG.getMachineFunction());
2826}
2827
2828SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2829 SDValue GA) const {
2830 const bool Is64Bit = Subtarget.isPPC64();
2831 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2832 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2833 : Subtarget.isAIXABI()
2834 ? DAG.getRegister(PPC::R2, VT)
2835 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2836 SDValue Ops[] = { GA, Reg };
2837 return DAG.getMemIntrinsicNode(
2838 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2839 MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2840 MachineMemOperand::MOLoad);
2841}
2842
2843SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2844 SelectionDAG &DAG) const {
2845 EVT PtrVT = Op.getValueType();
2846 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2847 const Constant *C = CP->getConstVal();
2848
2849 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2850 // The actual address of the GlobalValue is stored in the TOC.
2851 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2852 if (Subtarget.isUsingPCRelativeCalls()) {
2853 SDLoc DL(CP);
2854 EVT Ty = getPointerTy(DAG.getDataLayout());
2855 SDValue ConstPool = DAG.getTargetConstantPool(
2856 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2857 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2858 }
2859 setUsesTOCBasePtr(DAG);
2860 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2861 return getTOCEntry(DAG, SDLoc(CP), GA);
2862 }
2863
2864 unsigned MOHiFlag, MOLoFlag;
2865 bool IsPIC = isPositionIndependent();
2866 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2867
2868 if (IsPIC && Subtarget.isSVR4ABI()) {
2869 SDValue GA =
2870 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2871 return getTOCEntry(DAG, SDLoc(CP), GA);
2872 }
2873
2874 SDValue CPIHi =
2875 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2876 SDValue CPILo =
2877 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2878 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2879}
2880
2881// For 64-bit PowerPC, prefer the more compact relative encodings.
2882// This trades 32 bits per jump table entry for one or two instructions
2883// on the jump site.
2884unsigned PPCTargetLowering::getJumpTableEncoding() const {
2885 if (isJumpTableRelative())
2886 return MachineJumpTableInfo::EK_LabelDifference32;
2887
2888 return TargetLowering::getJumpTableEncoding();
2889}
2890
2891bool PPCTargetLowering::isJumpTableRelative() const {
2892 if (UseAbsoluteJumpTables)
2893 return false;
2894 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2895 return true;
2896 return TargetLowering::isJumpTableRelative();
2897}
2898
2899SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2900 SelectionDAG &DAG) const {
2901 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2902 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2903
2904 switch (getTargetMachine().getCodeModel()) {
2905 case CodeModel::Small:
2906 case CodeModel::Medium:
2907 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2908 default:
2909 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2910 getPointerTy(DAG.getDataLayout()));
2911 }
2912}
2913
2914const MCExpr *
2915PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2916 unsigned JTI,
2917 MCContext &Ctx) const {
2918 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2919 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2920
2921 switch (getTargetMachine().getCodeModel()) {
2922 case CodeModel::Small:
2923 case CodeModel::Medium:
2924 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2925 default:
2926 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2927 }
2928}
2929
2930SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2931 EVT PtrVT = Op.getValueType();
2932 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2933
2934 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2935 if (Subtarget.isUsingPCRelativeCalls()) {
2936 SDLoc DL(JT);
2937 EVT Ty = getPointerTy(DAG.getDataLayout());
2938 SDValue GA =
2939 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2940 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2941 return MatAddr;
2942 }
2943
2944 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2945 // The actual address of the GlobalValue is stored in the TOC.
2946 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2947 setUsesTOCBasePtr(DAG);
2948 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2949 return getTOCEntry(DAG, SDLoc(JT), GA);
2950 }
2951
2952 unsigned MOHiFlag, MOLoFlag;
2953 bool IsPIC = isPositionIndependent();
2954 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2955
2956 if (IsPIC && Subtarget.isSVR4ABI()) {
2957 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2958 PPCII::MO_PIC_FLAG);
2959 return getTOCEntry(DAG, SDLoc(GA), GA);
2960 }
2961
2962 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2963 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2964 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2965}
2966
2967SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2968 SelectionDAG &DAG) const {
2969 EVT PtrVT = Op.getValueType();
2970 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2971 const BlockAddress *BA = BASDN->getBlockAddress();
2972
2973 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2974 if (Subtarget.isUsingPCRelativeCalls()) {
2975 SDLoc DL(BASDN);
2976 EVT Ty = getPointerTy(DAG.getDataLayout());
2977 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2978 PPCII::MO_PCREL_FLAG);
2979 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2980 return MatAddr;
2981 }
2982
2983 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2984 // The actual BlockAddress is stored in the TOC.
2985 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2986 setUsesTOCBasePtr(DAG);
2987 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2988 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2989 }
2990
2991 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2992 if (Subtarget.is32BitELFABI() && isPositionIndependent())
2993 return getTOCEntry(
2994 DAG, SDLoc(BASDN),
2995 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2996
2997 unsigned MOHiFlag, MOLoFlag;
2998 bool IsPIC = isPositionIndependent();
2999 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3000 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3001 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3002 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3003}
3004
3005SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3006 SelectionDAG &DAG) const {
3007 // FIXME: TLS addresses currently use medium model code sequences,
3008 // which is the most useful form. Eventually support for small and
3009 // large models could be added if users need it, at the cost of
3010 // additional complexity.
3011 if (Subtarget.isUsingPCRelativeCalls() && !EnablePPCPCRelTLS)
3012 report_fatal_error("Thread local storage is not supported with pc-relative"
3013 " addressing - please compile with -mno-pcrel");
3014 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3015 if (DAG.getTarget().useEmulatedTLS())
3016 return LowerToTLSEmulatedModel(GA, DAG);
3017
3018 SDLoc dl(GA);
3019 const GlobalValue *GV = GA->getGlobal();
3020 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3021 bool is64bit = Subtarget.isPPC64();
3022 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3023 PICLevel::Level picLevel = M->getPICLevel();
3024
3025 const TargetMachine &TM = getTargetMachine();
3026 TLSModel::Model Model = TM.getTLSModel(GV);
3027
3028 if (Model == TLSModel::LocalExec) {
3029 if (Subtarget.isUsingPCRelativeCalls()) {
3030 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3031 SDValue TGA = DAG.getTargetGlobalAddress(
3032 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3033 SDValue MatAddr =
3034 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3035 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3036 }
3037
3038 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3039 PPCII::MO_TPREL_HA);
3040 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3041 PPCII::MO_TPREL_LO);
3042 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3043 : DAG.getRegister(PPC::R2, MVT::i32);
3044
3045 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3046 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3047 }
3048
3049 if (Model == TLSModel::InitialExec) {
3050 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3051 SDValue TGA = DAG.getTargetGlobalAddress(
3052 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3053 SDValue TGATLS = DAG.getTargetGlobalAddress(
3054 GV, dl, PtrVT, 0,
3055 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3056 SDValue TPOffset;
3057 if (IsPCRel) {
3058 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3059 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3060 MachinePointerInfo());
3061 } else {
3062 SDValue GOTPtr;
3063 if (is64bit) {
3064 setUsesTOCBasePtr(DAG);
3065 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3066 GOTPtr =
3067 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3068 } else {
3069 if (!TM.isPositionIndependent())
3070 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3071 else if (picLevel == PICLevel::SmallPIC)
3072 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3073 else
3074 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3075 }
3076 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3077 }
3078 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3079 }
3080
3081 if (Model == TLSModel::GeneralDynamic) {
3082 if (Subtarget.isUsingPCRelativeCalls()) {
3083 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3084 PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3085 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3086 }
3087
3088 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3089 SDValue GOTPtr;
3090 if (is64bit) {
3091 setUsesTOCBasePtr(DAG);
3092 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3093 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3094 GOTReg, TGA);
3095 } else {
3096 if (picLevel == PICLevel::SmallPIC)
3097 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3098 else
3099 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3100 }
3101 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3102 GOTPtr, TGA, TGA);
3103 }
3104
3105 if (Model == TLSModel::LocalDynamic) {
3106 if (Subtarget.isUsingPCRelativeCalls()) {
3107 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3108 PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3109 SDValue MatPCRel =
3110 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3111 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3112 }
3113
3114 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3115 SDValue GOTPtr;
3116 if (is64bit) {
3117 setUsesTOCBasePtr(DAG);
3118 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3119 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3120 GOTReg, TGA);
3121 } else {
3122 if (picLevel == PICLevel::SmallPIC)
3123 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3124 else
3125 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3126 }
3127 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3128 PtrVT, GOTPtr, TGA, TGA);
3129 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3130 PtrVT, TLSAddr, TGA);
3131 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3132 }
3133
3134 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3134)
;
3135}
3136
3137SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3138 SelectionDAG &DAG) const {
3139 EVT PtrVT = Op.getValueType();
3140 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3141 SDLoc DL(GSDN);
3142 const GlobalValue *GV = GSDN->getGlobal();
3143
3144 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3145 // The actual address of the GlobalValue is stored in the TOC.
3146 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3147 if (Subtarget.isUsingPCRelativeCalls()) {
3148 EVT Ty = getPointerTy(DAG.getDataLayout());
3149 if (isAccessedAsGotIndirect(Op)) {
3150 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3151 PPCII::MO_PCREL_FLAG |
3152 PPCII::MO_GOT_FLAG);
3153 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3154 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3155 MachinePointerInfo());
3156 return Load;
3157 } else {
3158 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3159 PPCII::MO_PCREL_FLAG);
3160 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3161 }
3162 }
3163 setUsesTOCBasePtr(DAG);
3164 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3165 return getTOCEntry(DAG, DL, GA);
3166 }
3167
3168 unsigned MOHiFlag, MOLoFlag;
3169 bool IsPIC = isPositionIndependent();
3170 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3171
3172 if (IsPIC && Subtarget.isSVR4ABI()) {
3173 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3174 GSDN->getOffset(),
3175 PPCII::MO_PIC_FLAG);
3176 return getTOCEntry(DAG, DL, GA);
3177 }
3178
3179 SDValue GAHi =
3180 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3181 SDValue GALo =
3182 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3183
3184 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3185}
3186
3187SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3188 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3189 SDLoc dl(Op);
3190
3191 if (Op.getValueType() == MVT::v2i64) {
3192 // When the operands themselves are v2i64 values, we need to do something
3193 // special because VSX has no underlying comparison operations for these.
3194 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3195 // Equality can be handled by casting to the legal type for Altivec
3196 // comparisons, everything else needs to be expanded.
3197 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3198 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3199 DAG.getSetCC(dl, MVT::v4i32,
3200 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3201 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3202 CC));
3203 }
3204
3205 return SDValue();
3206 }
3207
3208 // We handle most of these in the usual way.
3209 return Op;
3210 }
3211
3212 // If we're comparing for equality to zero, expose the fact that this is
3213 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3214 // fold the new nodes.
3215 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3216 return V;
3217
3218 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3219 // Leave comparisons against 0 and -1 alone for now, since they're usually
3220 // optimized. FIXME: revisit this when we can custom lower all setcc
3221 // optimizations.
3222 if (C->isAllOnesValue() || C->isNullValue())
3223 return SDValue();
3224 }
3225
3226 // If we have an integer seteq/setne, turn it into a compare against zero
3227 // by xor'ing the rhs with the lhs, which is faster than setting a
3228 // condition register, reading it back out, and masking the correct bit. The
3229 // normal approach here uses sub to do this instead of xor. Using xor exposes
3230 // the result to other bit-twiddling opportunities.
3231 EVT LHSVT = Op.getOperand(0).getValueType();
3232 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3233 EVT VT = Op.getValueType();
3234 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3235 Op.getOperand(1));
3236 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3237 }
3238 return SDValue();
3239}
3240
3241SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3242 SDNode *Node = Op.getNode();
3243 EVT VT = Node->getValueType(0);
3244 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3245 SDValue InChain = Node->getOperand(0);
3246 SDValue VAListPtr = Node->getOperand(1);
3247 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3248 SDLoc dl(Node);
3249
3250 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3250, __PRETTY_FUNCTION__))
;
3251
3252 // gpr_index
3253 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3254 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3255 InChain = GprIndex.getValue(1);
3256
3257 if (VT == MVT::i64) {
3258 // Check if GprIndex is even
3259 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3260 DAG.getConstant(1, dl, MVT::i32));
3261 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3262 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3263 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3264 DAG.getConstant(1, dl, MVT::i32));
3265 // Align GprIndex to be even if it isn't
3266 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3267 GprIndex);
3268 }
3269
3270 // fpr index is 1 byte after gpr
3271 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3272 DAG.getConstant(1, dl, MVT::i32));
3273
3274 // fpr
3275 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3276 FprPtr, MachinePointerInfo(SV), MVT::i8);
3277 InChain = FprIndex.getValue(1);
3278
3279 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3280 DAG.getConstant(8, dl, MVT::i32));
3281
3282 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3283 DAG.getConstant(4, dl, MVT::i32));
3284
3285 // areas
3286 SDValue OverflowArea =
3287 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3288 InChain = OverflowArea.getValue(1);
3289
3290 SDValue RegSaveArea =
3291 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3292 InChain = RegSaveArea.getValue(1);
3293
3294 // select overflow_area if index > 8
3295 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3296 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3297
3298 // adjustment constant gpr_index * 4/8
3299 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3300 VT.isInteger() ? GprIndex : FprIndex,
3301 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3302 MVT::i32));
3303
3304 // OurReg = RegSaveArea + RegConstant
3305 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3306 RegConstant);
3307
3308 // Floating types are 32 bytes into RegSaveArea
3309 if (VT.isFloatingPoint())
3310 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3311 DAG.getConstant(32, dl, MVT::i32));
3312
3313 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3314 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3315 VT.isInteger() ? GprIndex : FprIndex,
3316 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3317 MVT::i32));
3318
3319 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3320 VT.isInteger() ? VAListPtr : FprPtr,
3321 MachinePointerInfo(SV), MVT::i8);
3322
3323 // determine if we should load from reg_save_area or overflow_area
3324 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3325
3326 // increase overflow_area by 4/8 if gpr/fpr > 8
3327 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3328 DAG.getConstant(VT.isInteger() ? 4 : 8,
3329 dl, MVT::i32));
3330
3331 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3332 OverflowAreaPlusN);
3333
3334 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3335 MachinePointerInfo(), MVT::i32);
3336
3337 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3338}
3339
3340SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3341 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3341, __PRETTY_FUNCTION__))
;
3342
3343 // We have to copy the entire va_list struct:
3344 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3345 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3346 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3347 false, true, false, MachinePointerInfo(),
3348 MachinePointerInfo());
3349}
3350
3351SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3352 SelectionDAG &DAG) const {
3353 if (Subtarget.isAIXABI())
3354 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3355
3356 return Op.getOperand(0);
3357}
3358
3359SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3360 SelectionDAG &DAG) const {
3361 if (Subtarget.isAIXABI())
3362 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3363
3364 SDValue Chain = Op.getOperand(0);
3365 SDValue Trmp = Op.getOperand(1); // trampoline
3366 SDValue FPtr = Op.getOperand(2); // nested function
3367 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3368 SDLoc dl(Op);
3369
3370 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3371 bool isPPC64 = (PtrVT == MVT::i64);
3372 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3373
3374 TargetLowering::ArgListTy Args;
3375 TargetLowering::ArgListEntry Entry;
3376
3377 Entry.Ty = IntPtrTy;
3378 Entry.Node = Trmp; Args.push_back(Entry);
3379
3380 // TrampSize == (isPPC64 ? 48 : 40);
3381 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3382 isPPC64 ? MVT::i64 : MVT::i32);
3383 Args.push_back(Entry);
3384
3385 Entry.Node = FPtr; Args.push_back(Entry);
3386 Entry.Node = Nest; Args.push_back(Entry);
3387
3388 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3389 TargetLowering::CallLoweringInfo CLI(DAG);
3390 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3391 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3392 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3393
3394 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3395 return CallResult.second;
3396}
3397
3398SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3399 MachineFunction &MF = DAG.getMachineFunction();
3400 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3401 EVT PtrVT = getPointerTy(MF.getDataLayout());
3402
3403 SDLoc dl(Op);
3404
3405 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3406 // vastart just stores the address of the VarArgsFrameIndex slot into the
3407 // memory location argument.
3408 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3409 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3410 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3411 MachinePointerInfo(SV));
3412 }
3413
3414 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3415 // We suppose the given va_list is already allocated.
3416 //
3417 // typedef struct {
3418 // char gpr; /* index into the array of 8 GPRs
3419 // * stored in the register save area
3420 // * gpr=0 corresponds to r3,
3421 // * gpr=1 to r4, etc.
3422 // */
3423 // char fpr; /* index into the array of 8 FPRs
3424 // * stored in the register save area
3425 // * fpr=0 corresponds to f1,
3426 // * fpr=1 to f2, etc.
3427 // */
3428 // char *overflow_arg_area;
3429 // /* location on stack that holds
3430 // * the next overflow argument
3431 // */
3432 // char *reg_save_area;
3433 // /* where r3:r10 and f1:f8 (if saved)
3434 // * are stored
3435 // */
3436 // } va_list[1];
3437
3438 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3439 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3440 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3441 PtrVT);
3442 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3443 PtrVT);
3444
3445 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3446 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3447
3448 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3449 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3450
3451 uint64_t FPROffset = 1;
3452 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3453
3454 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3455
3456 // Store first byte : number of int regs
3457 SDValue firstStore =
3458 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3459 MachinePointerInfo(SV), MVT::i8);
3460 uint64_t nextOffset = FPROffset;
3461 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3462 ConstFPROffset);
3463
3464 // Store second byte : number of float regs
3465 SDValue secondStore =
3466 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3467 MachinePointerInfo(SV, nextOffset), MVT::i8);
3468 nextOffset += StackOffset;
3469 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3470
3471 // Store second word : arguments given on stack
3472 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3473 MachinePointerInfo(SV, nextOffset));
3474 nextOffset += FrameOffset;
3475 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3476
3477 // Store third word : arguments given in registers
3478 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3479 MachinePointerInfo(SV, nextOffset));
3480}
3481
3482/// FPR - The set of FP registers that should be allocated for arguments
3483/// on Darwin and AIX.
3484static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3485 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3486 PPC::F11, PPC::F12, PPC::F13};
3487
3488/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3489/// the stack.
3490static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3491 unsigned PtrByteSize) {
3492 unsigned ArgSize = ArgVT.getStoreSize();
3493 if (Flags.isByVal())
3494 ArgSize = Flags.getByValSize();
3495
3496 // Round up to multiples of the pointer size, except for array members,
3497 // which are always packed.
3498 if (!Flags.isInConsecutiveRegs())
3499 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3500
3501 return ArgSize;
3502}
3503
3504/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3505/// on the stack.
3506static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3507 ISD::ArgFlagsTy Flags,
3508 unsigned PtrByteSize) {
3509 Align Alignment(PtrByteSize);
3510
3511 // Altivec parameters are padded to a 16 byte boundary.
3512 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3513 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3514 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3515 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3516 Alignment = Align(16);
3517
3518 // ByVal parameters are aligned as requested.
3519 if (Flags.isByVal()) {
3520 auto BVAlign = Flags.getNonZeroByValAlign();
3521 if (BVAlign > PtrByteSize) {
3522 if (BVAlign.value() % PtrByteSize != 0)
3523 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3524)
3524 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3524)
;
3525
3526 Alignment = BVAlign;
3527 }
3528 }
3529
3530 // Array members are always packed to their original alignment.
3531 if (Flags.isInConsecutiveRegs()) {
3532 // If the array member was split into multiple registers, the first
3533 // needs to be aligned to the size of the full type. (Except for
3534 // ppcf128, which is only aligned as its f64 components.)
3535 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3536 Alignment = Align(OrigVT.getStoreSize());
3537 else
3538 Alignment = Align(ArgVT.getStoreSize());
3539 }
3540
3541 return Alignment;
3542}
3543
3544/// CalculateStackSlotUsed - Return whether this argument will use its
3545/// stack slot (instead of being passed in registers). ArgOffset,
3546/// AvailableFPRs, and AvailableVRs must hold the current argument
3547/// position, and will be updated to account for this argument.
3548static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3549 unsigned PtrByteSize, unsigned LinkageSize,
3550 unsigned ParamAreaSize, unsigned &ArgOffset,
3551 unsigned &AvailableFPRs,
3552 unsigned &AvailableVRs) {
3553 bool UseMemory = false;
3554
3555 // Respect alignment of argument on the stack.
3556 Align Alignment =
3557 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3558 ArgOffset = alignTo(ArgOffset, Alignment);
3559 // If there's no space left in the argument save area, we must
3560 // use memory (this check also catches zero-sized arguments).
3561 if (ArgOffset >= LinkageSize + ParamAreaSize)
3562 UseMemory = true;
3563
3564 // Allocate argument on the stack.
3565 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3566 if (Flags.isInConsecutiveRegsLast())
3567 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3568 // If we overran the argument save area, we must use memory
3569 // (this check catches arguments passed partially in memory)
3570 if (ArgOffset > LinkageSize + ParamAreaSize)
3571 UseMemory = true;
3572
3573 // However, if the argument is actually passed in an FPR or a VR,
3574 // we don't use memory after all.
3575 if (!Flags.isByVal()) {
3576 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3577 if (AvailableFPRs > 0) {
3578 --AvailableFPRs;
3579 return false;
3580 }
3581 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3582 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3583 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3584 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3585 if (AvailableVRs > 0) {
3586 --AvailableVRs;
3587 return false;
3588 }
3589 }
3590
3591 return UseMemory;
3592}
3593
3594/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3595/// ensure minimum alignment required for target.
3596static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3597 unsigned NumBytes) {
3598 return alignTo(NumBytes, Lowering->getStackAlign());
3599}
3600
3601SDValue PPCTargetLowering::LowerFormalArguments(
3602 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3603 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3604 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3605 if (Subtarget.isAIXABI())
3606 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3607 InVals);
3608 if (Subtarget.is64BitELFABI())
3609 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3610 InVals);
3611 if (Subtarget.is32BitELFABI())
3612 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3613 InVals);
3614
3615 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3616 InVals);
3617}
3618
3619SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3620 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3621 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3622 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3623
3624 // 32-bit SVR4 ABI Stack Frame Layout:
3625 // +-----------------------------------+
3626 // +--> | Back chain |
3627 // | +-----------------------------------+
3628 // | | Floating-point register save area |
3629 // | +-----------------------------------+
3630 // | | General register save area |
3631 // | +-----------------------------------+
3632 // | | CR save word |
3633 // | +-----------------------------------+
3634 // | | VRSAVE save word |
3635 // | +-----------------------------------+
3636 // | | Alignment padding |
3637 // | +-----------------------------------+
3638 // | | Vector register save area |
3639 // | +-----------------------------------+
3640 // | | Local variable space |
3641 // | +-----------------------------------+
3642 // | | Parameter list area |
3643 // | +-----------------------------------+
3644 // | | LR save word |
3645 // | +-----------------------------------+
3646 // SP--> +--- | Back chain |
3647 // +-----------------------------------+
3648 //
3649 // Specifications:
3650 // System V Application Binary Interface PowerPC Processor Supplement
3651 // AltiVec Technology Programming Interface Manual
3652
3653 MachineFunction &MF = DAG.getMachineFunction();
3654 MachineFrameInfo &MFI = MF.getFrameInfo();
3655 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3656
3657 EVT PtrVT = getPointerTy(MF.getDataLayout());
3658 // Potential tail calls could cause overwriting of argument stack slots.
3659 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3660 (CallConv == CallingConv::Fast));
3661 const Align PtrAlign(4);
3662
3663 // Assign locations to all of the incoming arguments.
3664 SmallVector<CCValAssign, 16> ArgLocs;
3665 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3666 *DAG.getContext());
3667
3668 // Reserve space for the linkage area on the stack.
3669 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3670 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3671 if (useSoftFloat())
3672 CCInfo.PreAnalyzeFormalArguments(Ins);
3673
3674 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3675 CCInfo.clearWasPPCF128();
3676
3677 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3678 CCValAssign &VA = ArgLocs[i];
3679
3680 // Arguments stored in registers.
3681 if (VA.isRegLoc()) {
3682 const TargetRegisterClass *RC;
3683 EVT ValVT = VA.getValVT();
3684
3685 switch (ValVT.getSimpleVT().SimpleTy) {
3686 default:
3687 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3687)
;
3688 case MVT::i1:
3689 case MVT::i32:
3690 RC = &PPC::GPRCRegClass;
3691 break;
3692 case MVT::f32:
3693 if (Subtarget.hasP8Vector())
3694 RC = &PPC::VSSRCRegClass;
3695 else if (Subtarget.hasSPE())
3696 RC = &PPC::GPRCRegClass;
3697 else
3698 RC = &PPC::F4RCRegClass;
3699 break;
3700 case MVT::f64:
3701 if (Subtarget.hasVSX())
3702 RC = &PPC::VSFRCRegClass;
3703 else if (Subtarget.hasSPE())
3704 // SPE passes doubles in GPR pairs.
3705 RC = &PPC::GPRCRegClass;
3706 else
3707 RC = &PPC::F8RCRegClass;
3708 break;
3709 case MVT::v16i8:
3710 case MVT::v8i16:
3711 case MVT::v4i32:
3712 RC = &PPC::VRRCRegClass;
3713 break;
3714 case MVT::v4f32:
3715 RC = &PPC::VRRCRegClass;
3716 break;
3717 case MVT::v2f64:
3718 case MVT::v2i64:
3719 RC = &PPC::VRRCRegClass;
3720 break;
3721 }
3722
3723 SDValue ArgValue;
3724 // Transform the arguments stored in physical registers into
3725 // virtual ones.
3726 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3727 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3727, __PRETTY_FUNCTION__))
;
3728 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3729 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3730 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3731 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3732 if (!Subtarget.isLittleEndian())
3733 std::swap (ArgValueLo, ArgValueHi);
3734 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3735 ArgValueHi);
3736 } else {
3737 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3738 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3739 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3740 if (ValVT == MVT::i1)
3741 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3742 }
3743
3744 InVals.push_back(ArgValue);
3745 } else {
3746 // Argument stored in memory.
3747 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3747, __PRETTY_FUNCTION__))
;
3748
3749 // Get the extended size of the argument type in stack
3750 unsigned ArgSize = VA.getLocVT().getStoreSize();
3751 // Get the actual size of the argument type
3752 unsigned ObjSize = VA.getValVT().getStoreSize();
3753 unsigned ArgOffset = VA.getLocMemOffset();
3754 // Stack objects in PPC32 are right justified.
3755 ArgOffset += ArgSize - ObjSize;
3756 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3757
3758 // Create load nodes to retrieve arguments from the stack.
3759 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3760 InVals.push_back(
3761 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3762 }
3763 }
3764
3765 // Assign locations to all of the incoming aggregate by value arguments.
3766 // Aggregates passed by value are stored in the local variable space of the
3767 // caller's stack frame, right above the parameter list area.
3768 SmallVector<CCValAssign, 16> ByValArgLocs;
3769 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3770 ByValArgLocs, *DAG.getContext());
3771
3772 // Reserve stack space for the allocations in CCInfo.
3773 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3774
3775 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3776
3777 // Area that is at least reserved in the caller of this function.
3778 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3779 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3780
3781 // Set the size that is at least reserved in caller of this function. Tail
3782 // call optimized function's reserved stack space needs to be aligned so that
3783 // taking the difference between two stack areas will result in an aligned
3784 // stack.
3785 MinReservedArea =
3786 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3787 FuncInfo->setMinReservedArea(MinReservedArea);
3788
3789 SmallVector<SDValue, 8> MemOps;
3790
3791 // If the function takes variable number of arguments, make a frame index for
3792 // the start of the first vararg value... for expansion of llvm.va_start.
3793 if (isVarArg) {
3794 static const MCPhysReg GPArgRegs[] = {
3795 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3796 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3797 };
3798 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3799
3800 static const MCPhysReg FPArgRegs[] = {
3801 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3802 PPC::F8
3803 };
3804 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3805
3806 if (useSoftFloat() || hasSPE())
3807 NumFPArgRegs = 0;
3808
3809 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3810 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3811
3812 // Make room for NumGPArgRegs and NumFPArgRegs.
3813 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3814 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3815
3816 FuncInfo->setVarArgsStackOffset(
3817 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3818 CCInfo.getNextStackOffset(), true));
3819
3820 FuncInfo->setVarArgsFrameIndex(
3821 MFI.CreateStackObject(Depth, Align(8), false));
3822 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3823
3824 // The fixed integer arguments of a variadic function are stored to the
3825 // VarArgsFrameIndex on the stack so that they may be loaded by
3826 // dereferencing the result of va_next.
3827 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3828 // Get an existing live-in vreg, or add a new one.
3829 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3830 if (!VReg)
3831 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3832
3833 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3834 SDValue Store =
3835 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3836 MemOps.push_back(Store);
3837 // Increment the address by four for the next argument to store
3838 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3839 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3840 }
3841
3842 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3843 // is set.
3844 // The double arguments are stored to the VarArgsFrameIndex
3845 // on the stack.
3846 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3847 // Get an existing live-in vreg, or add a new one.
3848 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3849 if (!VReg)
3850 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3851
3852 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3853 SDValue Store =
3854 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3855 MemOps.push_back(Store);
3856 // Increment the address by eight for the next argument to store
3857 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3858 PtrVT);
3859 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3860 }
3861 }
3862
3863 if (!MemOps.empty())
3864 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3865
3866 return Chain;
3867}
3868
3869// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3870// value to MVT::i64 and then truncate to the correct register size.
3871SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3872 EVT ObjectVT, SelectionDAG &DAG,
3873 SDValue ArgVal,
3874 const SDLoc &dl) const {
3875 if (Flags.isSExt())
3876 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3877 DAG.getValueType(ObjectVT));
3878 else if (Flags.isZExt())
3879 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3880 DAG.getValueType(ObjectVT));
3881
3882 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3883}
3884
3885SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3886 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3887 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3888 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3889 // TODO: add description of PPC stack frame format, or at least some docs.
3890 //
3891 bool isELFv2ABI = Subtarget.isELFv2ABI();
3892 bool isLittleEndian = Subtarget.isLittleEndian();
3893 MachineFunction &MF = DAG.getMachineFunction();
3894 MachineFrameInfo &MFI = MF.getFrameInfo();
3895 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3896
3897 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3898, __PRETTY_FUNCTION__))
3898 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3898, __PRETTY_FUNCTION__))
;
3899
3900 EVT PtrVT = getPointerTy(MF.getDataLayout());
3901 // Potential tail calls could cause overwriting of argument stack slots.
3902 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3903 (CallConv == CallingConv::Fast));
3904 unsigned PtrByteSize = 8;
3905 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3906
3907 static const MCPhysReg GPR[] = {
3908 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3909 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3910 };
3911 static const MCPhysReg VR[] = {
3912 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3913 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3914 };
3915
3916 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3917 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3918 const unsigned Num_VR_Regs = array_lengthof(VR);
3919
3920 // Do a first pass over the arguments to determine whether the ABI
3921 // guarantees that our caller has allocated the parameter save area
3922 // on its stack frame. In the ELFv1 ABI, this is always the case;
3923 // in the ELFv2 ABI, it is true if this is a vararg function or if
3924 // any parameter is located in a stack slot.
3925
3926 bool HasParameterArea = !isELFv2ABI || isVarArg;
3927 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3928 unsigned NumBytes = LinkageSize;
3929 unsigned AvailableFPRs = Num_FPR_Regs;
3930 unsigned AvailableVRs = Num_VR_Regs;
3931 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3932 if (Ins[i].Flags.isNest())
3933 continue;
3934
3935 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3936 PtrByteSize, LinkageSize, ParamAreaSize,
3937 NumBytes, AvailableFPRs, AvailableVRs))
3938 HasParameterArea = true;
3939 }
3940
3941 // Add DAG nodes to load the arguments or copy them out of registers. On
3942 // entry to a function on PPC, the arguments start after the linkage area,
3943 // although the first ones are often in registers.
3944
3945 unsigned ArgOffset = LinkageSize;
3946 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3947 SmallVector<SDValue, 8> MemOps;
3948 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3949 unsigned CurArgIdx = 0;
3950 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3951 SDValue ArgVal;
3952 bool needsLoad = false;
3953 EVT ObjectVT = Ins[ArgNo].VT;
3954 EVT OrigVT = Ins[ArgNo].ArgVT;
3955 unsigned ObjSize = ObjectVT.getStoreSize();
3956 unsigned ArgSize = ObjSize;
3957 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3958 if (Ins[ArgNo].isOrigArg()) {
3959 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3960 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3961 }
3962 // We re-align the argument offset for each argument, except when using the
3963 // fast calling convention, when we need to make sure we do that only when
3964 // we'll actually use a stack slot.
3965 unsigned CurArgOffset;
3966 Align Alignment;
3967 auto ComputeArgOffset = [&]() {
3968 /* Respect alignment of argument on the stack. */
3969 Alignment =
3970 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3971 ArgOffset = alignTo(ArgOffset, Alignment);
3972 CurArgOffset = ArgOffset;
3973 };
3974
3975 if (CallConv != CallingConv::Fast) {
3976 ComputeArgOffset();
3977
3978 /* Compute GPR index associated with argument offset. */
3979 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3980 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3981 }
3982
3983 // FIXME the codegen can be much improved in some cases.
3984 // We do not have to keep everything in memory.
3985 if (Flags.isByVal()) {
3986 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3986, __PRETTY_FUNCTION__))
;
3987
3988 if (CallConv == CallingConv::Fast)
3989 ComputeArgOffset();
3990
3991 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3992 ObjSize = Flags.getByValSize();
3993 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3994 // Empty aggregate parameters do not take up registers. Examples:
3995 // struct { } a;
3996 // union { } b;
3997 // int c[0];
3998 // etc. However, we have to provide a place-holder in InVals, so
3999 // pretend we have an 8-byte item at the current address for that
4000 // purpose.
4001 if (!ObjSize) {
4002 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4003 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4004 InVals.push_back(FIN);
4005 continue;
4006 }
4007
4008 // Create a stack object covering all stack doublewords occupied
4009 // by the argument. If the argument is (fully or partially) on
4010 // the stack, or if the argument is fully in registers but the
4011 // caller has allocated the parameter save anyway, we can refer
4012 // directly to the caller's stack frame. Otherwise, create a
4013 // local copy in our own frame.
4014 int FI;
4015 if (HasParameterArea ||
4016 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4017 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4018 else
4019 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4020 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4021
4022 // Handle aggregates smaller than 8 bytes.
4023 if (ObjSize < PtrByteSize) {
4024 // The value of the object is its address, which differs from the
4025 // address of the enclosing doubleword on big-endian systems.
4026 SDValue Arg = FIN;
4027 if (!isLittleEndian) {
4028 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4029 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4030 }
4031 InVals.push_back(Arg);
4032
4033 if (GPR_idx != Num_GPR_Regs) {
4034 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4035 FuncInfo->addLiveInAttr(VReg, Flags);
4036 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4037 SDValue Store;
4038
4039 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4040 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4041 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4042 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4043 MachinePointerInfo(&*FuncArg), ObjType);
4044 } else {
4045 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4046 // store the whole register as-is to the parameter save area
4047 // slot.
4048 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4049 MachinePointerInfo(&*FuncArg));
4050 }
4051
4052 MemOps.push_back(Store);
4053 }
4054 // Whether we copied from a register or not, advance the offset
4055 // into the parameter save area by a full doubleword.
4056 ArgOffset += PtrByteSize;
4057 continue;
4058 }
4059
4060 // The value of the object is its address, which is the address of
4061 // its first stack doubleword.
4062 InVals.push_back(FIN);
4063
4064 // Store whatever pieces of the object are in registers to memory.
4065 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4066 if (GPR_idx == Num_GPR_Regs)
4067 break;
4068
4069 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4070 FuncInfo->addLiveInAttr(VReg, Flags);
4071 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4072 SDValue Addr = FIN;
4073 if (j) {
4074 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4075 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4076 }
4077 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4078 MachinePointerInfo(&*FuncArg, j));
4079 MemOps.push_back(Store);
4080 ++GPR_idx;
4081 }
4082 ArgOffset += ArgSize;
4083 continue;
4084 }
4085
4086 switch (ObjectVT.getSimpleVT().SimpleTy) {
4087 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4087)
;
4088 case MVT::i1:
4089 case MVT::i32:
4090 case MVT::i64:
4091 if (Flags.isNest()) {
4092 // The 'nest' parameter, if any, is passed in R11.
4093 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4094 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4095
4096 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4097 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4098
4099 break;
4100 }
4101
4102 // These can be scalar arguments or elements of an integer array type
4103 // passed directly. Clang may use those instead of "byval" aggregate
4104 // types to avoid forcing arguments to memory unnecessarily.
4105 if (GPR_idx != Num_GPR_Regs) {
4106 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4107 FuncInfo->addLiveInAttr(VReg, Flags);
4108 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4109
4110 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4111 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4112 // value to MVT::i64 and then truncate to the correct register size.
4113 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4114 } else {
4115 if (CallConv == CallingConv::Fast)
4116 ComputeArgOffset();
4117
4118 needsLoad = true;
4119 ArgSize = PtrByteSize;
4120 }
4121 if (CallConv != CallingConv::Fast || needsLoad)
4122 ArgOffset += 8;
4123 break;
4124
4125 case MVT::f32:
4126 case MVT::f64:
4127 // These can be scalar arguments or elements of a float array type
4128 // passed directly. The latter are used to implement ELFv2 homogenous
4129 // float aggregates.
4130 if (FPR_idx != Num_FPR_Regs) {
4131 unsigned VReg;
4132
4133 if (ObjectVT == MVT::f32)
4134 VReg = MF.addLiveIn(FPR[FPR_idx],
4135 Subtarget.hasP8Vector()
4136 ? &PPC::VSSRCRegClass
4137 : &PPC::F4RCRegClass);
4138 else
4139 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4140 ? &PPC::VSFRCRegClass
4141 : &PPC::F8RCRegClass);
4142
4143 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4144 ++FPR_idx;
4145 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4146 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4147 // once we support fp <-> gpr moves.
4148
4149 // This can only ever happen in the presence of f32 array types,
4150 // since otherwise we never run out of FPRs before running out
4151 // of GPRs.
4152 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4153 FuncInfo->addLiveInAttr(VReg, Flags);
4154 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4155
4156 if (ObjectVT == MVT::f32) {
4157 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4158 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4159 DAG.getConstant(32, dl, MVT::i32));
4160 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4161 }
4162
4163 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4164 } else {
4165 if (CallConv == CallingConv::Fast)
4166 ComputeArgOffset();
4167
4168 needsLoad = true;
4169 }
4170
4171 // When passing an array of floats, the array occupies consecutive
4172 // space in the argument area; only round up to the next doubleword
4173 // at the end of the array. Otherwise, each float takes 8 bytes.
4174 if (CallConv != CallingConv::Fast || needsLoad) {
4175 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4176 ArgOffset += ArgSize;
4177 if (Flags.isInConsecutiveRegsLast())
4178 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4179 }
4180 break;
4181 case MVT::v4f32:
4182 case MVT::v4i32:
4183 case MVT::v8i16:
4184 case MVT::v16i8:
4185 case MVT::v2f64:
4186 case MVT::v2i64:
4187 case MVT::v1i128:
4188 case MVT::f128:
4189 // These can be scalar arguments or elements of a vector array type
4190 // passed directly. The latter are used to implement ELFv2 homogenous
4191 // vector aggregates.
4192 if (VR_idx != Num_VR_Regs) {
4193 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4194 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4195 ++VR_idx;
4196 } else {
4197 if (CallConv == CallingConv::Fast)
4198 ComputeArgOffset();
4199 needsLoad = true;
4200 }
4201 if (CallConv != CallingConv::Fast || needsLoad)
4202 ArgOffset += 16;
4203 break;
4204 }
4205
4206 // We need to load the argument to a virtual register if we determined
4207 // above that we ran out of physical registers of the appropriate type.
4208 if (needsLoad) {
4209 if (ObjSize < ArgSize && !isLittleEndian)
4210 CurArgOffset += ArgSize - ObjSize;
4211 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4212 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4213 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4214 }
4215
4216 InVals.push_back(ArgVal);
4217 }
4218
4219 // Area that is at least reserved in the caller of this function.
4220 unsigned MinReservedArea;
4221 if (HasParameterArea)
4222 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4223 else
4224 MinReservedArea = LinkageSize;
4225
4226 // Set the size that is at least reserved in caller of this function. Tail
4227 // call optimized functions' reserved stack space needs to be aligned so that
4228 // taking the difference between two stack areas will result in an aligned
4229 // stack.
4230 MinReservedArea =
4231 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4232 FuncInfo->setMinReservedArea(MinReservedArea);
4233
4234 // If the function takes variable number of arguments, make a frame index for
4235 // the start of the first vararg value... for expansion of llvm.va_start.
4236 // On ELFv2ABI spec, it writes:
4237 // C programs that are intended to be *portable* across different compilers
4238 // and architectures must use the header file <stdarg.h> to deal with variable
4239 // argument lists.
4240 if (isVarArg && MFI.hasVAStart()) {
4241 int Depth = ArgOffset;
4242
4243 FuncInfo->setVarArgsFrameIndex(
4244 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4245 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4246
4247 // If this function is vararg, store any remaining integer argument regs
4248 // to their spots on the stack so that they may be loaded by dereferencing
4249 // the result of va_next.
4250 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4251 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4252 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4253 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4254 SDValue Store =
4255 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4256 MemOps.push_back(Store);
4257 // Increment the address by four for the next argument to store
4258 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4259 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4260 }
4261 }
4262
4263 if (!MemOps.empty())
4264 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4265
4266 return Chain;
4267}
4268
4269SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4270 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4271 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4272 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4273 // TODO: add description of PPC stack frame format, or at least some docs.
4274 //
4275 MachineFunction &MF = DAG.getMachineFunction();
4276 MachineFrameInfo &MFI = MF.getFrameInfo();
4277 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4278
4279 EVT PtrVT = getPointerTy(MF.getDataLayout());
4280 bool isPPC64 = PtrVT == MVT::i64;
4281 // Potential tail calls could cause overwriting of argument stack slots.
4282 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4283 (CallConv == CallingConv::Fast));
4284 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4285 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4286 unsigned ArgOffset = LinkageSize;
4287 // Area that is at least reserved in caller of this function.
4288 unsigned MinReservedArea = ArgOffset;
4289
4290 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4291 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4292 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4293 };
4294 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4295 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4296 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4297 };
4298 static const MCPhysReg VR[] = {
4299 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4300 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4301 };
4302
4303 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4304 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4305 const unsigned Num_VR_Regs = array_lengthof( VR);
4306
4307 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4308
4309 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4310
4311 // In 32-bit non-varargs functions, the stack space for vectors is after the
4312 // stack space for non-vectors. We do not use this space unless we have
4313 // too many vectors to fit in registers, something that only occurs in
4314 // constructed examples:), but we have to walk the arglist to figure
4315 // that out...for the pathological case, compute VecArgOffset as the
4316 // start of the vector parameter area. Computing VecArgOffset is the
4317 // entire point of the following loop.
4318 unsigned VecArgOffset = ArgOffset;
4319 if (!isVarArg && !isPPC64) {
4320 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4321 ++ArgNo) {
4322 EVT ObjectVT = Ins[ArgNo].VT;
4323 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4324
4325 if (Flags.isByVal()) {
4326 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4327 unsigned ObjSize = Flags.getByValSize();
4328 unsigned ArgSize =
4329 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4330 VecArgOffset += ArgSize;
4331 continue;
4332 }
4333
4334 switch(ObjectVT.getSimpleVT().SimpleTy) {
4335 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4335)
;
4336 case MVT::i1:
4337 case MVT::i32:
4338 case MVT::f32:
4339 VecArgOffset += 4;
4340 break;
4341 case MVT::i64: // PPC64
4342 case MVT::f64:
4343 // FIXME: We are guaranteed to be !isPPC64 at this point.
4344 // Does MVT::i64 apply?
4345 VecArgOffset += 8;
4346 break;
4347 case MVT::v4f32:
4348 case MVT::v4i32:
4349 case MVT::v8i16:
4350 case MVT::v16i8:
4351 // Nothing to do, we're only looking at Nonvector args here.
4352 break;
4353 }
4354 }
4355 }
4356 // We've found where the vector parameter area in memory is. Skip the
4357 // first 12 parameters; these don't use that memory.
4358 VecArgOffset = ((VecArgOffset+15)/16)*16;
4359 VecArgOffset += 12*16;
4360
4361 // Add DAG nodes to load the arguments or copy them out of registers. On
4362 // entry to a function on PPC, the arguments start after the linkage area,
4363 // although the first ones are often in registers.
4364
4365 SmallVector<SDValue, 8> MemOps;
4366 unsigned nAltivecParamsAtEnd = 0;
4367 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4368 unsigned CurArgIdx = 0;
4369 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4370 SDValue ArgVal;
4371 bool needsLoad = false;
4372 EVT ObjectVT = Ins[ArgNo].VT;
4373 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4374 unsigned ArgSize = ObjSize;
4375 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4376 if (Ins[ArgNo].isOrigArg()) {
4377 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4378 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4379 }
4380 unsigned CurArgOffset = ArgOffset;
4381
4382 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4383 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4384 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4385 if (isVarArg || isPPC64) {
4386 MinReservedArea = ((MinReservedArea+15)/16)*16;
4387 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4388 Flags,
4389 PtrByteSize);
4390 } else nAltivecParamsAtEnd++;
4391 } else
4392 // Calculate min reserved area.
4393 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4394 Flags,
4395 PtrByteSize);
4396
4397 // FIXME the codegen can be much improved in some cases.
4398 // We do not have to keep everything in memory.
4399 if (Flags.isByVal()) {
4400 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4400, __PRETTY_FUNCTION__))
;
4401
4402 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4403 ObjSize = Flags.getByValSize();
4404 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4405 // Objects of size 1 and 2 are right justified, everything else is
4406 // left justified. This means the memory address is adjusted forwards.
4407 if (ObjSize==1 || ObjSize==2) {
4408 CurArgOffset = CurArgOffset + (4 - ObjSize);
4409 }
4410 // The value of the object is its address.
4411 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4412 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4413 InVals.push_back(FIN);
4414 if (ObjSize==1 || ObjSize==2) {
4415 if (GPR_idx != Num_GPR_Regs) {
4416 unsigned VReg;
4417 if (isPPC64)
4418 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4419 else
4420 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4421 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4422 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4423 SDValue Store =
4424 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4425 MachinePointerInfo(&*FuncArg), ObjType);
4426 MemOps.push_back(Store);
4427 ++GPR_idx;
4428 }
4429
4430 ArgOffset += PtrByteSize;
4431
4432 continue;
4433 }
4434 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4435 // Store whatever pieces of the object are in registers
4436 // to memory. ArgOffset will be the address of the beginning
4437 // of the object.
4438 if (GPR_idx != Num_GPR_Regs) {
4439 unsigned VReg;
4440 if (isPPC64)
4441 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4442 else
4443 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4444 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4445 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4446 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4447 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4448 MachinePointerInfo(&*FuncArg, j));
4449 MemOps.push_back(Store);
4450 ++GPR_idx;
4451 ArgOffset += PtrByteSize;
4452 } else {
4453 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4454 break;
4455 }
4456 }
4457 continue;
4458 }
4459
4460 switch (ObjectVT.getSimpleVT().SimpleTy) {
4461 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4461)
;
4462 case MVT::i1:
4463 case MVT::i32:
4464 if (!isPPC64) {
4465 if (GPR_idx != Num_GPR_Regs) {
4466 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4467 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4468
4469 if (ObjectVT == MVT::i1)
4470 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4471
4472 ++GPR_idx;
4473 } else {
4474 needsLoad = true;
4475 ArgSize = PtrByteSize;
4476 }
4477 // All int arguments reserve stack space in the Darwin ABI.
4478 ArgOffset += PtrByteSize;
4479 break;
4480 }
4481 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4482 case MVT::i64: // PPC64
4483 if (GPR_idx != Num_GPR_Regs) {
4484 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4485 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4486
4487 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4488 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4489 // value to MVT::i64 and then truncate to the correct register size.
4490 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4491
4492 ++GPR_idx;
4493 } else {
4494 needsLoad = true;
4495 ArgSize = PtrByteSize;
4496 }
4497 // All int arguments reserve stack space in the Darwin ABI.
4498 ArgOffset += 8;
4499 break;
4500
4501 case MVT::f32:
4502 case MVT::f64:
4503 // Every 4 bytes of argument space consumes one of the GPRs available for
4504 // argument passing.
4505 if (GPR_idx != Num_GPR_Regs) {
4506 ++GPR_idx;
4507 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4508 ++GPR_idx;
4509 }
4510 if (FPR_idx != Num_FPR_Regs) {
4511 unsigned VReg;
4512
4513 if (ObjectVT == MVT::f32)
4514 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4515 else
4516 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4517
4518 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4519 ++FPR_idx;
4520 } else {
4521 needsLoad = true;
4522 }
4523
4524 // All FP arguments reserve stack space in the Darwin ABI.
4525 ArgOffset += isPPC64 ? 8 : ObjSize;
4526 break;
4527 case MVT::v4f32:
4528 case MVT::v4i32:
4529 case MVT::v8i16:
4530 case MVT::v16i8:
4531 // Note that vector arguments in registers don't reserve stack space,
4532 // except in varargs functions.
4533 if (VR_idx != Num_VR_Regs) {
4534 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4535 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4536 if (isVarArg) {
4537 while ((ArgOffset % 16) != 0) {
4538 ArgOffset += PtrByteSize;
4539 if (GPR_idx != Num_GPR_Regs)
4540 GPR_idx++;
4541 }
4542 ArgOffset += 16;
4543 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4544 }
4545 ++VR_idx;
4546 } else {
4547 if (!isVarArg && !isPPC64) {
4548 // Vectors go after all the nonvectors.
4549 CurArgOffset = VecArgOffset;
4550 VecArgOffset += 16;
4551 } else {
4552 // Vectors are aligned.
4553 ArgOffset = ((ArgOffset+15)/16)*16;
4554 CurArgOffset = ArgOffset;
4555 ArgOffset += 16;
4556 }
4557 needsLoad = true;
4558 }
4559 break;
4560 }
4561
4562 // We need to load the argument to a virtual register if we determined above
4563 // that we ran out of physical registers of the appropriate type.
4564 if (needsLoad) {
4565 int FI = MFI.CreateFixedObject(ObjSize,
4566 CurArgOffset + (ArgSize - ObjSize),
4567 isImmutable);
4568 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4569 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4570 }
4571
4572 InVals.push_back(ArgVal);
4573 }
4574
4575 // Allow for Altivec parameters at the end, if needed.
4576 if (nAltivecParamsAtEnd) {
4577 MinReservedArea = ((MinReservedArea+15)/16)*16;
4578 MinReservedArea += 16*nAltivecParamsAtEnd;
4579 }
4580
4581 // Area that is at least reserved in the caller of this function.
4582 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4583
4584 // Set the size that is at least reserved in caller of this function. Tail
4585 // call optimized functions' reserved stack space needs to be aligned so that
4586 // taking the difference between two stack areas will result in an aligned
4587 // stack.
4588 MinReservedArea =
4589 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4590 FuncInfo->setMinReservedArea(MinReservedArea);
4591
4592 // If the function takes variable number of arguments, make a frame index for
4593 // the start of the first vararg value... for expansion of llvm.va_start.
4594 if (isVarArg) {
4595 int Depth = ArgOffset;
4596
4597 FuncInfo->setVarArgsFrameIndex(
4598 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4599 Depth, true));
4600 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4601
4602 // If this function is vararg, store any remaining integer argument regs
4603 // to their spots on the stack so that they may be loaded by dereferencing
4604 // the result of va_next.
4605 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4606 unsigned VReg;
4607
4608 if (isPPC64)
4609 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4610 else
4611 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4612
4613 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4614 SDValue Store =
4615 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4616 MemOps.push_back(Store);
4617 // Increment the address by four for the next argument to store
4618 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4619 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4620 }
4621 }
4622
4623 if (!MemOps.empty())
4624 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4625
4626 return Chain;
4627}
4628
4629/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4630/// adjusted to accommodate the arguments for the tailcall.
4631static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4632 unsigned ParamSize) {
4633
4634 if (!isTailCall) return 0;
4635
4636 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4637 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4638 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4639 // Remember only if the new adjustment is bigger.
4640 if (SPDiff < FI->getTailCallSPDelta())
4641 FI->setTailCallSPDelta(SPDiff);
4642
4643 return SPDiff;
4644}
4645
4646static bool isFunctionGlobalAddress(SDValue Callee);
4647
4648static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4649 const TargetMachine &TM) {
4650 // It does not make sense to call callsShareTOCBase() with a caller that
4651 // is PC Relative since PC Relative callers do not have a TOC.
4652#ifndef NDEBUG
4653 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4654 assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4655, __PRETTY_FUNCTION__))
4655 "PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4655, __PRETTY_FUNCTION__))
;
4656#endif
4657
4658 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4659 // don't have enough information to determine if the caller and callee share
4660 // the same TOC base, so we have to pessimistically assume they don't for
4661 // correctness.
4662 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4663 if (!G)
4664 return false;
4665
4666 const GlobalValue *GV = G->getGlobal();
4667
4668 // If the callee is preemptable, then the static linker will use a plt-stub
4669 // which saves the toc to the stack, and needs a nop after the call
4670 // instruction to convert to a toc-restore.
4671 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4672 return false;
4673
4674 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4675 // We may need a TOC restore in the situation where the caller requires a
4676 // valid TOC but the callee is PC Relative and does not.
4677 const Function *F = dyn_cast<Function>(GV);
4678 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4679
4680 // If we have an Alias we can try to get the function from there.
4681 if (Alias) {
4682 const GlobalObject *GlobalObj = Alias->getBaseObject();
4683 F = dyn_cast<Function>(GlobalObj);
4684 }
4685
4686 // If we still have no valid function pointer we do not have enough
4687 // information to determine if the callee uses PC Relative calls so we must
4688 // assume that it does.
4689 if (!F)
4690 return false;
4691
4692 // If the callee uses PC Relative we cannot guarantee that the callee won't
4693 // clobber the TOC of the caller and so we must assume that the two
4694 // functions do not share a TOC base.
4695 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4696 if (STICallee->isUsingPCRelativeCalls())
4697 return false;
4698
4699 // The medium and large code models are expected to provide a sufficiently
4700 // large TOC to provide all data addressing needs of a module with a
4701 // single TOC.
4702 if (CodeModel::Medium == TM.getCodeModel() ||
4703 CodeModel::Large == TM.getCodeModel())
4704 return true;
4705
4706 // Otherwise we need to ensure callee and caller are in the same section,
4707 // since the linker may allocate multiple TOCs, and we don't know which
4708 // sections will belong to the same TOC base.
4709 if (!GV->isStrongDefinitionForLinker())
4710 return false;
4711
4712 // Any explicitly-specified sections and section prefixes must also match.
4713 // Also, if we're using -ffunction-sections, then each function is always in
4714 // a different section (the same is true for COMDAT functions).
4715 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4716 GV->getSection() != Caller->getSection())
4717 return false;
4718 if (const auto *F = dyn_cast<Function>(GV)) {
4719 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4720 return false;
4721 }
4722
4723 return true;
4724}
4725
4726static bool
4727needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4728 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4729 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4729, __PRETTY_FUNCTION__))
;
4730
4731 const unsigned PtrByteSize = 8;
4732 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4733
4734 static const MCPhysReg GPR[] = {
4735 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4736 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4737 };
4738 static const MCPhysReg VR[] = {
4739 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4740 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4741 };
4742
4743 const unsigned NumGPRs = array_lengthof(GPR);
4744 const unsigned NumFPRs = 13;
4745 const unsigned NumVRs = array_lengthof(VR);
4746 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4747
4748 unsigned NumBytes = LinkageSize;
4749 unsigned AvailableFPRs = NumFPRs;
4750 unsigned AvailableVRs = NumVRs;
4751
4752 for (const ISD::OutputArg& Param : Outs) {
4753 if (Param.Flags.isNest()) continue;
4754
4755 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4756 LinkageSize, ParamAreaSize, NumBytes,
4757 AvailableFPRs, AvailableVRs))
4758 return true;
4759 }
4760 return false;
4761}
4762
4763static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4764 if (CB.arg_size() != CallerFn->arg_size())
4765 return false;
4766
4767 auto CalleeArgIter = CB.arg_begin();
4768 auto CalleeArgEnd = CB.arg_end();
4769 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4770
4771 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4772 const Value* CalleeArg = *CalleeArgIter;
4773 const Value* CallerArg = &(*CallerArgIter);
4774 if (CalleeArg == CallerArg)
4775 continue;
4776
4777 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4778 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4779 // }
4780 // 1st argument of callee is undef and has the same type as caller.
4781 if (CalleeArg->getType() == CallerArg->getType() &&
4782 isa<UndefValue>(CalleeArg))
4783 continue;
4784
4785 return false;
4786 }
4787
4788 return true;
4789}
4790
4791// Returns true if TCO is possible between the callers and callees
4792// calling conventions.
4793static bool
4794areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4795 CallingConv::ID CalleeCC) {
4796 // Tail calls are possible with fastcc and ccc.
4797 auto isTailCallableCC = [] (CallingConv::ID CC){
4798 return CC == CallingConv::C || CC == CallingConv::Fast;
4799 };
4800 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4801 return false;
4802
4803 // We can safely tail call both fastcc and ccc callees from a c calling
4804 // convention caller. If the caller is fastcc, we may have less stack space
4805 // than a non-fastcc caller with the same signature so disable tail-calls in
4806 // that case.
4807 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4808}
4809
4810bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4811 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4812 const SmallVectorImpl<ISD::OutputArg> &Outs,
4813 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4814 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4815
4816 if (DisableSCO && !TailCallOpt) return false;
4817
4818 // Variadic argument functions are not supported.
4819 if (isVarArg) return false;
4820
4821 auto &Caller = DAG.getMachineFunction().getFunction();
4822 // Check that the calling conventions are compatible for tco.
4823 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4824 return false;
4825
4826 // Caller contains any byval parameter is not supported.
4827 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4828 return false;
4829
4830 // Callee contains any byval parameter is not supported, too.
4831 // Note: This is a quick work around, because in some cases, e.g.
4832 // caller's stack size > callee's stack size, we are still able to apply
4833 // sibling call optimization. For example, gcc is able to do SCO for caller1
4834 // in the following example, but not for caller2.
4835 // struct test {
4836 // long int a;
4837 // char ary[56];
4838 // } gTest;
4839 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4840 // b->a = v.a;
4841 // return 0;
4842 // }
4843 // void caller1(struct test a, struct test c, struct test *b) {
4844 // callee(gTest, b); }
4845 // void caller2(struct test *b) { callee(gTest, b); }
4846 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4847 return false;
4848
4849 // If callee and caller use different calling conventions, we cannot pass
4850 // parameters on stack since offsets for the parameter area may be different.
4851 if (Caller.getCallingConv() != CalleeCC &&
4852 needStackSlotPassParameters(Subtarget, Outs))
4853 return false;
4854
4855 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4856 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4857 // callee potentially have different TOC bases then we cannot tail call since
4858 // we need to restore the TOC pointer after the call.
4859 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4860 // We cannot guarantee this for indirect calls or calls to external functions.
4861 // When PC-Relative addressing is used, the concept of the TOC is no longer
4862 // applicable so this check is not required.
4863 // Check first for indirect calls.
4864 if (!Subtarget.isUsingPCRelativeCalls() &&
4865 !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4866 return false;
4867
4868 // Check if we share the TOC base.
4869 if (!Subtarget.isUsingPCRelativeCalls() &&
4870 !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4871 return false;
4872
4873 // TCO allows altering callee ABI, so we don't have to check further.
4874 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4875 return true;
4876
4877 if (DisableSCO) return false;
4878
4879 // If callee use the same argument list that caller is using, then we can
4880 // apply SCO on this case. If it is not, then we need to check if callee needs
4881 // stack for passing arguments.
4882 // PC Relative tail calls may not have a CallBase.
4883 // If there is no CallBase we cannot verify if we have the same argument
4884 // list so assume that we don't have the same argument list.
4885 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4886 needStackSlotPassParameters(Subtarget, Outs))
4887 return false;
4888 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4889 return false;
4890
4891 return true;
4892}
4893
4894/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4895/// for tail call optimization. Targets which want to do tail call
4896/// optimization should implement this function.
4897bool
4898PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4899 CallingConv::ID CalleeCC,
4900 bool isVarArg,
4901 const SmallVectorImpl<ISD::InputArg> &Ins,
4902 SelectionDAG& DAG) const {
4903 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4904 return false;
4905
4906 // Variable argument functions are not supported.
4907 if (isVarArg)
4908 return false;
4909
4910 MachineFunction &MF = DAG.getMachineFunction();
4911 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4912 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4913 // Functions containing by val parameters are not supported.
4914 for (unsigned i = 0; i != Ins.size(); i++) {
4915 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4916 if (Flags.isByVal()) return false;
4917 }
4918
4919 // Non-PIC/GOT tail calls are supported.
4920 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4921 return true;
4922
4923 // At the moment we can only do local tail calls (in same module, hidden
4924 // or protected) if we are generating PIC.
4925 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4926 return G->getGlobal()->hasHiddenVisibility()
4927 || G->getGlobal()->hasProtectedVisibility();
4928 }
4929
4930 return false;
4931}
4932
4933/// isCallCompatibleAddress - Return the immediate to use if the specified
4934/// 32-bit value is representable in the immediate field of a BxA instruction.
4935static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4936 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4937 if (!C) return nullptr;
4938
4939 int Addr = C->getZExtValue();
4940 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4941 SignExtend32<26>(Addr) != Addr)
4942 return nullptr; // Top 6 bits have to be sext of immediate.
4943
4944 return DAG
4945 .getConstant(
4946 (int)C->getZExtValue() >> 2, SDLoc(Op),
4947 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4948 .getNode();
4949}
4950
4951namespace {
4952
4953struct TailCallArgumentInfo {
4954 SDValue Arg;
4955 SDValue FrameIdxOp;
4956 int FrameIdx = 0;
4957
4958 TailCallArgumentInfo() = default;
4959};
4960
4961} // end anonymous namespace
4962
4963/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4964static void StoreTailCallArgumentsToStackSlot(
4965 SelectionDAG &DAG, SDValue Chain,
4966 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4967 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4968 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4969 SDValue Arg = TailCallArgs[i].Arg;
4970 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4971 int FI = TailCallArgs[i].FrameIdx;
4972 // Store relative to framepointer.
4973 MemOpChains.push_back(DAG.getStore(
4974 Chain, dl, Arg, FIN,
4975 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4976 }
4977}
4978
4979/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4980/// the appropriate stack slot for the tail call optimized function call.
4981static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4982 SDValue OldRetAddr, SDValue OldFP,
4983 int SPDiff, const SDLoc &dl) {
4984 if (SPDiff) {
4985 // Calculate the new stack slot for the return address.
4986 MachineFunction &MF = DAG.getMachineFunction();
4987 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4988 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4989 bool isPPC64 = Subtarget.isPPC64();
4990 int SlotSize = isPPC64 ? 8 : 4;
4991 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4992 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4993 NewRetAddrLoc, true);
4994 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4995 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4996 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4997 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4998 }
4999 return Chain;
5000}
5001
5002/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5003/// the position of the argument.
5004static void
5005CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5006 SDValue Arg, int SPDiff, unsigned ArgOffset,
5007 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5008 int Offset = ArgOffset + SPDiff;
5009 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5010 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5011 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5012 SDValue FIN = DAG.getFrameIndex(FI, VT);
5013 TailCallArgumentInfo Info;
5014 Info.Arg = Arg;
5015 Info.FrameIdxOp = FIN;
5016 Info.FrameIdx = FI;
5017 TailCallArguments.push_back(Info);
5018}
5019
5020/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5021/// stack slot. Returns the chain as result and the loaded frame pointers in
5022/// LROpOut/FPOpout. Used when tail calling.
5023SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5024 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5025 SDValue &FPOpOut, const SDLoc &dl) const {
5026 if (SPDiff) {
5027 // Load the LR and FP stack slot for later adjusting.
5028 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5029 LROpOut = getReturnAddrFrameIndex(DAG);
5030 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5031 Chain = SDValue(LROpOut.getNode(), 1);
5032 }
5033 return Chain;
5034}
5035
5036/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5037/// by "Src" to address "Dst" of size "Size". Alignment information is
5038/// specified by the specific parameter attribute. The copy will be passed as
5039/// a byval function parameter.
5040/// Sometimes what we are copying is the end of a larger object, the part that
5041/// does not fit in registers.
5042static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5043 SDValue Chain, ISD::ArgFlagsTy Flags,
5044 SelectionDAG &DAG, const SDLoc &dl) {
5045 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5046 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5047 Flags.getNonZeroByValAlign(), false, false, false,
5048 MachinePointerInfo(), MachinePointerInfo());
5049}
5050
5051/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5052/// tail calls.
5053static void LowerMemOpCallTo(
5054 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5055 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5056 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5057 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5058 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5059 if (!isTailCall) {
5060 if (isVector) {
5061 SDValue StackPtr;
5062 if (isPPC64)
5063 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5064 else
5065 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5066 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5067 DAG.getConstant(ArgOffset, dl, PtrVT));
5068 }
5069 MemOpChains.push_back(
5070 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5071 // Calculate and remember argument location.
5072 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5073 TailCallArguments);
5074}
5075
5076static void
5077PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5078 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5079 SDValue FPOp,
5080 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5081 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5082 // might overwrite each other in case of tail call optimization.
5083 SmallVector<SDValue, 8> MemOpChains2;
5084 // Do not flag preceding copytoreg stuff together with the following stuff.
5085 InFlag = SDValue();
5086 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5087 MemOpChains2, dl);
5088 if (!MemOpChains2.empty())
5089 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5090
5091 // Store the return address to the appropriate stack slot.
5092 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5093
5094 // Emit callseq_end just before tailcall node.
5095 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5096 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5097 InFlag = Chain.getValue(1);
5098}
5099
5100// Is this global address that of a function that can be called by name? (as
5101// opposed to something that must hold a descriptor for an indirect call).
5102static bool isFunctionGlobalAddress(SDValue Callee) {
5103 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5104 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5105 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5106 return false;
5107
5108 return G->getGlobal()->getValueType()->isFunctionTy();
5109 }
5110
5111 return false;
5112}
5113
5114SDValue PPCTargetLowering::LowerCallResult(
5115 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5116 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5117 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5118 SmallVector<CCValAssign, 16> RVLocs;
5119 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5120 *DAG.getContext());
5121
5122 CCRetInfo.AnalyzeCallResult(
5123 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5124 ? RetCC_PPC_Cold
5125 : RetCC_PPC);
5126
5127 // Copy all of the result registers out of their specified physreg.
5128 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5129 CCValAssign &VA = RVLocs[i];
5130 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5130, __PRETTY_FUNCTION__))
;
5131
5132 SDValue Val;
5133
5134 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5135 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5136 InFlag);
5137 Chain = Lo.getValue(1);
5138 InFlag = Lo.getValue(2);
5139 VA = RVLocs[++i]; // skip ahead to next loc
5140 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5141 InFlag);
5142 Chain = Hi.getValue(1);
5143 InFlag = Hi.getValue(2);
5144 if (!Subtarget.isLittleEndian())
5145 std::swap (Lo, Hi);
5146 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5147 } else {
5148 Val = DAG.getCopyFromReg(Chain, dl,
5149 VA.getLocReg(), VA.getLocVT(), InFlag);
5150 Chain = Val.getValue(1);
5151 InFlag = Val.getValue(2);
5152 }
5153
5154 switch (VA.getLocInfo()) {
5155 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5155)
;
5156 case CCValAssign::Full: break;
5157 case CCValAssign::AExt:
5158 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5159 break;
5160 case CCValAssign::ZExt:
5161 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5162 DAG.getValueType(VA.getValVT()));
5163 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5164 break;
5165 case CCValAssign::SExt:
5166 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5167 DAG.getValueType(VA.getValVT()));
5168 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5169 break;
5170 }
5171
5172 InVals.push_back(Val);
5173 }
5174
5175 return Chain;
5176}
5177
5178static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5179 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5180 // PatchPoint calls are not indirect.
5181 if (isPatchPoint)
5182 return false;
5183
5184 if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5185 return false;
5186
5187 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5188 // becuase the immediate function pointer points to a descriptor instead of
5189 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5190 // pointer immediate points to the global entry point, while the BLA would
5191 // need to jump to the local entry point (see rL211174).
5192 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5193 isBLACompatibleAddress(Callee, DAG))
5194 return false;
5195
5196 return true;
5197}
5198
5199// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5200static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5201 return Subtarget.isAIXABI() ||
5202 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5203}
5204
5205static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5206 const Function &Caller,
5207 const SDValue &Callee,
5208 const PPCSubtarget &Subtarget,
5209 const TargetMachine &TM) {
5210 if (CFlags.IsTailCall)
5211 return PPCISD::TC_RETURN;
5212
5213 // This is a call through a function pointer.
5214 if (CFlags.IsIndirect) {
5215 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5216 // indirect calls. The save of the caller's TOC pointer to the stack will be
5217 // inserted into the DAG as part of call lowering. The restore of the TOC
5218 // pointer is modeled by using a pseudo instruction for the call opcode that
5219 // represents the 2 instruction sequence of an indirect branch and link,
5220 // immediately followed by a load of the TOC pointer from the the stack save
5221 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5222 // as it is not saved or used.
5223 return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5224 : PPCISD::BCTRL;
5225 }
5226
5227 if (Subtarget.isUsingPCRelativeCalls()) {
5228 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5228, __PRETTY_FUNCTION__))
;
5229 return PPCISD::CALL_NOTOC;
5230 }
5231
5232 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5233 // immediately following the call instruction if the caller and callee may
5234 // have different TOC bases. At link time if the linker determines the calls
5235 // may not share a TOC base, the call is redirected to a trampoline inserted
5236 // by the linker. The trampoline will (among other things) save the callers
5237 // TOC pointer at an ABI designated offset in the linkage area and the linker
5238 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5239 // into gpr2.
5240 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5241 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5242 : PPCISD::CALL_NOP;
5243
5244 return PPCISD::CALL;
5245}
5246
5247static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5248 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5249 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5250 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5251 return SDValue(Dest, 0);
5252
5253 // Returns true if the callee is local, and false otherwise.
5254 auto isLocalCallee = [&]() {
5255 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5256 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5257 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5258
5259 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5260 !dyn_cast_or_null<GlobalIFunc>(GV);
5261 };
5262
5263 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5264 // a static relocation model causes some versions of GNU LD (2.17.50, at
5265 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5266 // built with secure-PLT.
5267 bool UsePlt =
5268 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5269 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5270
5271 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5272 const TargetMachine &TM = Subtarget.getTargetMachine();
5273 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5274 MCSymbolXCOFF *S =
5275 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5276
5277 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5278 return DAG.getMCSymbol(S, PtrVT);
5279 };
5280
5281 if (isFunctionGlobalAddress(Callee)) {
5282 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5283
5284 if (Subtarget.isAIXABI()) {
5285 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5285, __PRETTY_FUNCTION__))
;
5286 return getAIXFuncEntryPointSymbolSDNode(GV);
5287 }
5288 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5289 UsePlt ? PPCII::MO_PLT : 0);
5290 }
5291
5292 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5293 const char *SymName = S->getSymbol();
5294 if (Subtarget.isAIXABI()) {
5295 // If there exists a user-declared function whose name is the same as the
5296 // ExternalSymbol's, then we pick up the user-declared version.
5297 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5298 if (const Function *F =
5299 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5300 return getAIXFuncEntryPointSymbolSDNode(F);
5301
5302 // On AIX, direct function calls reference the symbol for the function's
5303 // entry point, which is named by prepending a "." before the function's
5304 // C-linkage name. A Qualname is returned here because an external
5305 // function entry point is a csect with XTY_ER property.
5306 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5307 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5308 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5309 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5310 SectionKind::getMetadata());
5311 return Sec->getQualNameSymbol();
5312 };
5313
5314 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5315 }
5316 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5317 UsePlt ? PPCII::MO_PLT : 0);
5318 }
5319
5320 // No transformation needed.
5321 assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5321, __PRETTY_FUNCTION__))
;
5322 return Callee;
5323}
5324
5325static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5326 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5327, __PRETTY_FUNCTION__))
5327 "Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5327, __PRETTY_FUNCTION__))
;
5328
5329 // The last operand is the chain, except when the node has glue. If the node
5330 // has glue, then the last operand is the glue, and the chain is the second
5331 // last operand.
5332 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5333 if (LastValue.getValueType() != MVT::Glue)
5334 return LastValue;
5335
5336 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5337}
5338
5339// Creates the node that moves a functions address into the count register
5340// to prepare for an indirect call instruction.
5341static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5342 SDValue &Glue, SDValue &Chain,
5343 const SDLoc &dl) {
5344 SDValue MTCTROps[] = {Chain, Callee, Glue};
5345 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5346 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5347 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5348 // The glue is the second value produced.
5349 Glue = Chain.getValue(1);
5350}
5351
5352static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5353 SDValue &Glue, SDValue &Chain,
5354 SDValue CallSeqStart,
5355 const CallBase *CB, const SDLoc &dl,
5356 bool hasNest,
5357 const PPCSubtarget &Subtarget) {
5358 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5359 // entry point, but to the function descriptor (the function entry point
5360 // address is part of the function descriptor though).
5361 // The function descriptor is a three doubleword structure with the
5362 // following fields: function entry point, TOC base address and
5363 // environment pointer.
5364 // Thus for a call through a function pointer, the following actions need
5365 // to be performed:
5366 // 1. Save the TOC of the caller in the TOC save area of its stack
5367 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5368 // 2. Load the address of the function entry point from the function
5369 // descriptor.
5370 // 3. Load the TOC of the callee from the function descriptor into r2.
5371 // 4. Load the environment pointer from the function descriptor into
5372 // r11.
5373 // 5. Branch to the function entry point address.
5374 // 6. On return of the callee, the TOC of the caller needs to be
5375 // restored (this is done in FinishCall()).
5376 //
5377 // The loads are scheduled at the beginning of the call sequence, and the
5378 // register copies are flagged together to ensure that no other
5379 // operations can be scheduled in between. E.g. without flagging the
5380 // copies together, a TOC access in the caller could be scheduled between
5381 // the assignment of the callee TOC and the branch to the callee, which leads
5382 // to incorrect code.
5383
5384 // Start by loading the function address from the descriptor.
5385 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5386 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5387 ? (MachineMemOperand::MODereferenceable |
5388 MachineMemOperand::MOInvariant)
5389 : MachineMemOperand::MONone;
5390
5391 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5392
5393 // Registers used in building the DAG.
5394 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5395 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5396
5397 // Offsets of descriptor members.
5398 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5399 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5400
5401 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5402 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5403
5404 // One load for the functions entry point address.
5405 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5406 Alignment, MMOFlags);
5407
5408 // One for loading the TOC anchor for the module that contains the called
5409 // function.
5410 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5411 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5412 SDValue TOCPtr =
5413 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5414 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5415
5416 // One for loading the environment pointer.
5417 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5418 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5419 SDValue LoadEnvPtr =
5420 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5421 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5422
5423
5424 // Then copy the newly loaded TOC anchor to the TOC pointer.
5425 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5426 Chain = TOCVal.getValue(0);
5427 Glue = TOCVal.getValue(1);
5428
5429 // If the function call has an explicit 'nest' parameter, it takes the
5430 // place of the environment pointer.
5431 assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
5432 "Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5432, __PRETTY_FUNCTION__))
;
5433 if (!hasNest) {
5434 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5435 Chain = EnvVal.getValue(0);
5436 Glue = EnvVal.getValue(1);
5437 }
5438
5439 // The rest of the indirect call sequence is the same as the non-descriptor
5440 // DAG.
5441 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5442}
5443
5444static void
5445buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5446 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5447 SelectionDAG &DAG,
5448 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5449 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5450 const PPCSubtarget &Subtarget) {
5451 const bool IsPPC64 = Subtarget.isPPC64();
5452 // MVT for a general purpose register.
5453 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5454
5455 // First operand is always the chain.
5456 Ops.push_back(Chain);
5457
5458 // If it's a direct call pass the callee as the second operand.
5459 if (!CFlags.IsIndirect)
5460 Ops.push_back(Callee);
5461 else {
5462 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5462, __PRETTY_FUNCTION__))
;
5463
5464 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5465 // on the stack (this would have been done in `LowerCall_64SVR4` or
5466 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5467 // represents both the indirect branch and a load that restores the TOC
5468 // pointer from the linkage area. The operand for the TOC restore is an add
5469 // of the TOC save offset to the stack pointer. This must be the second
5470 // operand: after the chain input but before any other variadic arguments.
5471 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5472 // saved or used.
5473 if (isTOCSaveRestoreRequired(Subtarget)) {
5474 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5475
5476 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5477 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5478 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5479 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5480 Ops.push_back(AddTOC);
5481 }
5482
5483 // Add the register used for the environment pointer.
5484 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5485 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5486 RegVT));
5487
5488
5489 // Add CTR register as callee so a bctr can be emitted later.
5490 if (CFlags.IsTailCall)
5491 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5492 }
5493
5494 // If this is a tail call add stack pointer delta.
5495 if (CFlags.IsTailCall)
5496 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5497
5498 // Add argument registers to the end of the list so that they are known live
5499 // into the call.
5500 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5501 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5502 RegsToPass[i].second.getValueType()));
5503
5504 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5505 // no way to mark dependencies as implicit here.
5506 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5507 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5508 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5509 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5510
5511 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5512 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5513 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5514
5515 // Add a register mask operand representing the call-preserved registers.
5516 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5517 const uint32_t *Mask =
5518 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5519 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5519, __PRETTY_FUNCTION__))
;
5520 Ops.push_back(DAG.getRegisterMask(Mask));
5521
5522 // If the glue is valid, it is the last operand.
5523 if (Glue.getNode())
5524 Ops.push_back(Glue);
5525}
5526
5527SDValue PPCTargetLowering::FinishCall(
5528 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5529 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5530 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5531 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5532 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5533
5534 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5535 Subtarget.isAIXABI())
5536 setUsesTOCBasePtr(DAG);
5537
5538 unsigned CallOpc =
5539 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5540 Subtarget, DAG.getTarget());
5541
5542 if (!CFlags.IsIndirect)
5543 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5544 else if (Subtarget.usesFunctionDescriptors())
5545 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5546 dl, CFlags.HasNest, Subtarget);
5547 else
5548 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5549
5550 // Build the operand list for the call instruction.
5551 SmallVector<SDValue, 8> Ops;
5552 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5553 SPDiff, Subtarget);
5554
5555 // Emit tail call.
5556 if (CFlags.IsTailCall) {
5557 // Indirect tail call when using PC Relative calls do not have the same
5558 // constraints.
5559 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5560 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5561 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5562 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5563 isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5564 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5565 "Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5566 "register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
5567 "used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
;
5568 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5569 assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5570, __PRETTY_FUNCTION__))
5570 "Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5570, __PRETTY_FUNCTION__))
;
5571 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5572 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5573 }
5574
5575 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5576 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5577 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5578 Glue = Chain.getValue(1);
5579
5580 // When performing tail call optimization the callee pops its arguments off
5581 // the stack. Account for this here so these bytes can be pushed back on in
5582 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5583 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5584 getTargetMachine().Options.GuaranteedTailCallOpt)
5585 ? NumBytes
5586 : 0;
5587
5588 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5589 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5590 Glue, dl);
5591 Glue = Chain.getValue(1);
5592
5593 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5594 DAG, InVals);
5595}
5596
5597SDValue
5598PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5599 SmallVectorImpl<SDValue> &InVals) const {
5600 SelectionDAG &DAG = CLI.DAG;
5601 SDLoc &dl = CLI.DL;
5602 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5603 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5604 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5605 SDValue Chain = CLI.Chain;
5606 SDValue Callee = CLI.Callee;
5607 bool &isTailCall = CLI.IsTailCall;
5608 CallingConv::ID CallConv = CLI.CallConv;
5609 bool isVarArg = CLI.IsVarArg;
5610 bool isPatchPoint = CLI.IsPatchPoint;
5611 const CallBase *CB = CLI.CB;
5612
5613 if (isTailCall) {
5614 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5615 isTailCall = false;
5616 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5617 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5618 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5619 else
5620 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5621 Ins, DAG);
5622 if (isTailCall) {
5623 ++NumTailCalls;
5624 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5625 ++NumSiblingCalls;
5626
5627 // PC Relative calls no longer guarantee that the callee is a Global
5628 // Address Node. The callee could be an indirect tail call in which
5629 // case the SDValue for the callee could be a load (to load the address
5630 // of a function pointer) or it may be a register copy (to move the
5631 // address of the callee from a function parameter into a virtual
5632 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5633 assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5635, __PRETTY_FUNCTION__))
5634 isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5635, __PRETTY_FUNCTION__))
5635 "Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5635, __PRETTY_FUNCTION__))
;
5636
5637 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
5638 << "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
;
5639 LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false)
;
5640 }
5641 }
5642
5643 if (!isTailCall && CB && CB->isMustTailCall())
5644 report_fatal_error("failed to perform tail call elimination on a call "
5645 "site marked musttail");
5646
5647 // When long calls (i.e. indirect calls) are always used, calls are always
5648 // made via function pointer. If we have a function name, first translate it
5649 // into a pointer.
5650 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5651 !isTailCall)
5652 Callee = LowerGlobalAddress(Callee, DAG);
5653
5654 CallFlags CFlags(
5655 CallConv, isTailCall, isVarArg, isPatchPoint,
5656 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5657 // hasNest
5658 Subtarget.is64BitELFABI() &&
5659 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5660 CLI.NoMerge);
5661
5662 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5663 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5664 InVals, CB);
5665
5666 if (Subtarget.isSVR4ABI())
5667 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5668 InVals, CB);
5669
5670 if (Subtarget.isAIXABI())
5671 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5672 InVals, CB);
5673
5674 return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5675 InVals, CB);
5676}
5677
5678SDValue PPCTargetLowering::LowerCall_32SVR4(
5679 SDValue Chain, SDValue Callee, CallFlags CFlags,
5680 const SmallVectorImpl<ISD::OutputArg> &Outs,
5681 const SmallVectorImpl<SDValue> &OutVals,
5682 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5683 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5684 const CallBase *CB) const {
5685 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5686 // of the 32-bit SVR4 ABI stack frame layout.
5687
5688 const CallingConv::ID CallConv = CFlags.CallConv;
5689 const bool IsVarArg = CFlags.IsVarArg;
5690 const bool IsTailCall = CFlags.IsTailCall;
5691
5692 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5694, __PRETTY_FUNCTION__))
5693 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5694, __PRETTY_FUNCTION__))
5694 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5694, __PRETTY_FUNCTION__))
;
5695
5696 const Align PtrAlign(4);
5697
5698 MachineFunction &MF = DAG.getMachineFunction();
5699
5700 // Mark this function as potentially containing a function that contains a
5701 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5702 // and restoring the callers stack pointer in this functions epilog. This is
5703 // done because by tail calling the called function might overwrite the value
5704 // in this function's (MF) stack pointer stack slot 0(SP).
5705 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5706 CallConv == CallingConv::Fast)
5707 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5708
5709 // Count how many bytes are to be pushed on the stack, including the linkage
5710 // area, parameter list area and the part of the local variable space which
5711 // contains copies of aggregates which are passed by value.
5712
5713 // Assign locations to all of the outgoing arguments.
5714 SmallVector<CCValAssign, 16> ArgLocs;
5715 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5716
5717 // Reserve space for the linkage area on the stack.
5718 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5719 PtrAlign);
5720 if (useSoftFloat())
5721 CCInfo.PreAnalyzeCallOperands(Outs);
5722
5723 if (IsVarArg) {
5724 // Handle fixed and variable vector arguments differently.
5725 // Fixed vector arguments go into registers as long as registers are
5726 // available. Variable vector arguments always go into memory.
5727 unsigned NumArgs = Outs.size();
5728
5729 for (unsigned i = 0; i != NumArgs; ++i) {
5730 MVT ArgVT = Outs[i].VT;
5731 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5732 bool Result;
5733
5734 if (Outs[i].IsFixed) {
5735 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5736 CCInfo);
5737 } else {
5738 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5739 ArgFlags, CCInfo);
5740 }
5741
5742 if (Result) {
5743#ifndef NDEBUG
5744 errs() << "Call operand #" << i << " has unhandled type "
5745 << EVT(ArgVT).getEVTString() << "\n";
5746#endif
5747 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5747)
;
5748 }
5749 }
5750 } else {
5751 // All arguments are treated the same.
5752 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5753 }
5754 CCInfo.clearWasPPCF128();
5755
5756 // Assign locations to all of the outgoing aggregate by value arguments.
5757 SmallVector<CCValAssign, 16> ByValArgLocs;
5758 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5759
5760 // Reserve stack space for the allocations in CCInfo.
5761 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5762
5763 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5764
5765 // Size of the linkage area, parameter list area and the part of the local
5766 // space variable where copies of aggregates which are passed by value are
5767 // stored.
5768 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5769
5770 // Calculate by how many bytes the stack has to be adjusted in case of tail
5771 // call optimization.
5772 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5773
5774 // Adjust the stack pointer for the new arguments...
5775 // These operations are automatically eliminated by the prolog/epilog pass
5776 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5777 SDValue CallSeqStart = Chain;
5778
5779 // Load the return address and frame pointer so it can be moved somewhere else
5780 // later.
5781 SDValue LROp, FPOp;
5782 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5783
5784 // Set up a copy of the stack pointer for use loading and storing any
5785 // arguments that may not fit in the registers available for argument
5786 // passing.
5787 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5788
5789 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5790 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5791 SmallVector<SDValue, 8> MemOpChains;
5792
5793 bool seenFloatArg = false;
5794 // Walk the register/memloc assignments, inserting copies/loads.
5795 // i - Tracks the index into the list of registers allocated for the call
5796 // RealArgIdx - Tracks the index into the list of actual function arguments
5797 // j - Tracks the index into the list of byval arguments
5798 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5799 i != e;
5800 ++i, ++RealArgIdx) {
5801 CCValAssign &VA = ArgLocs[i];
5802 SDValue Arg = OutVals[RealArgIdx];
5803 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5804
5805 if (Flags.isByVal()) {
5806 // Argument is an aggregate which is passed by value, thus we need to
5807 // create a copy of it in the local variable space of the current stack
5808 // frame (which is the stack frame of the caller) and pass the address of
5809 // this copy to the callee.
5810 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5810, __PRETTY_FUNCTION__))
;
5811 CCValAssign &ByValVA = ByValArgLocs[j++];
5812 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5812, __PRETTY_FUNCTION__))
;
5813
5814 // Memory reserved in the local variable space of the callers stack frame.
5815 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5816
5817 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5818 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5819 StackPtr, PtrOff);
5820
5821 // Create a copy of the argument in the local area of the current
5822 // stack frame.
5823 SDValue MemcpyCall =
5824 CreateCopyOfByValArgument(Arg, PtrOff,
5825 CallSeqStart.getNode()->getOperand(0),
5826 Flags, DAG, dl);
5827
5828 // This must go outside the CALLSEQ_START..END.
5829 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5830 SDLoc(MemcpyCall));
5831 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5832 NewCallSeqStart.getNode());
5833 Chain = CallSeqStart = NewCallSeqStart;
5834
5835 // Pass the address of the aggregate copy on the stack either in a
5836 // physical register or in the parameter list area of the current stack
5837 // frame to the callee.
5838 Arg = PtrOff;
5839 }
5840
5841 // When useCRBits() is true, there can be i1 arguments.
5842 // It is because getRegisterType(MVT::i1) => MVT::i1,
5843 // and for other integer types getRegisterType() => MVT::i32.
5844 // Extend i1 and ensure callee will get i32.
5845 if (Arg.getValueType() == MVT::i1)
5846 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5847 dl, MVT::i32, Arg);
5848
5849 if (VA.isRegLoc()) {
5850 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5851 // Put argument in a physical register.
5852 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5853 bool IsLE = Subtarget.isLittleEndian();
5854 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5855 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5856 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5857 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5858 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5859 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5860 SVal.getValue(0)));
5861 } else
5862 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5863 } else {
5864 // Put argument in the parameter list area of the current stack frame.
5865 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5865, __PRETTY_FUNCTION__))
;
5866 unsigned LocMemOffset = VA.getLocMemOffset();
5867
5868 if (!IsTailCall) {
5869 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5870 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5871 StackPtr, PtrOff);
5872
5873 MemOpChains.push_back(
5874 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5875 } else {
5876 // Calculate and remember argument location.
5877 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5878 TailCallArguments);
5879 }
5880 }
5881 }
5882
5883 if (!MemOpChains.empty())
5884 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5885
5886 // Build a sequence of copy-to-reg nodes chained together with token chain
5887 // and flag operands which copy the outgoing args into the appropriate regs.
5888 SDValue InFlag;
5889 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5890 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5891 RegsToPass[i].second, InFlag);
5892 InFlag = Chain.getValue(1);
5893 }
5894
5895 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5896 // registers.
5897 if (IsVarArg) {
5898 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5899 SDValue Ops[] = { Chain, InFlag };
5900
5901 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5902 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5903
5904 InFlag = Chain.getValue(1);
5905 }
5906
5907 if (IsTailCall)
5908 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5909 TailCallArguments);
5910
5911 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5912 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5913}
5914
5915// Copy an argument into memory, being careful to do this outside the
5916// call sequence for the call to which the argument belongs.
5917SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5918 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5919 SelectionDAG &DAG, const SDLoc &dl) const {
5920 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5921 CallSeqStart.getNode()->getOperand(0),
5922 Flags, DAG, dl);
5923 // The MEMCPY must go outside the CALLSEQ_START..END.
5924 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5925 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5926