Bug Summary

File:lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 8416, column 31
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn338205/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/8/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/lib/Target/PowerPC -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-29-043837-17923-1 -x c++ /build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp -faddrsig
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
21#include "PPCMachineFunctionInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/None.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/StringSwitch.h"
38#include "llvm/CodeGen/CallingConvLower.h"
39#include "llvm/CodeGen/ISDOpcodes.h"
40#include "llvm/CodeGen/MachineBasicBlock.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineFunction.h"
43#include "llvm/CodeGen/MachineInstr.h"
44#include "llvm/CodeGen/MachineInstrBuilder.h"
45#include "llvm/CodeGen/MachineJumpTableInfo.h"
46#include "llvm/CodeGen/MachineLoopInfo.h"
47#include "llvm/CodeGen/MachineMemOperand.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetRegisterInfo.h"
56#include "llvm/CodeGen/ValueTypes.h"
57#include "llvm/IR/CallSite.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCExpr.h"
74#include "llvm/MC/MCRegisterInfo.h"
75#include "llvm/Support/AtomicOrdering.h"
76#include "llvm/Support/BranchProbability.h"
77#include "llvm/Support/Casting.h"
78#include "llvm/Support/CodeGen.h"
79#include "llvm/Support/CommandLine.h"
80#include "llvm/Support/Compiler.h"
81#include "llvm/Support/Debug.h"
82#include "llvm/Support/ErrorHandling.h"
83#include "llvm/Support/Format.h"
84#include "llvm/Support/KnownBits.h"
85#include "llvm/Support/MachineValueType.h"
86#include "llvm/Support/MathExtras.h"
87#include "llvm/Support/raw_ostream.h"
88#include "llvm/Target/TargetMachine.h"
89#include "llvm/Target/TargetOptions.h"
90#include <algorithm>
91#include <cassert>
92#include <cstdint>
93#include <iterator>
94#include <list>
95#include <utility>
96#include <vector>
97
98using namespace llvm;
99
100#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
101
102static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
103cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
104
105static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
106cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
107
108static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
109cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableSCO("disable-ppc-sco",
112cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
113
114static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
115cl::desc("enable quad precision float support on ppc"), cl::Hidden);
116
117STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
118STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls", {0}, {false}}
;
119
120static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
121
122// FIXME: Remove this once the bug has been fixed!
123extern cl::opt<bool> ANDIGlueBug;
124
125PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
126 const PPCSubtarget &STI)
127 : TargetLowering(TM), Subtarget(STI) {
128 // Use _setjmp/_longjmp instead of setjmp/longjmp.
129 setUseUnderscoreSetJmp(true);
130 setUseUnderscoreLongJmp(true);
131
132 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
133 // arguments are at least 4/8 bytes aligned.
134 bool isPPC64 = Subtarget.isPPC64();
135 setMinStackArgumentAlignment(isPPC64 ? 8:4);
136
137 // Set up the register classes.
138 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
139 if (!useSoftFloat()) {
140 if (hasSPE()) {
141 addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
142 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
143 } else {
144 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
145 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
146 }
147 }
148
149 // Match BITREVERSE to customized fast code sequence in the td file.
150 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
151 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
152
153 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
154 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
155
156 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
157 for (MVT VT : MVT::integer_valuetypes()) {
158 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
159 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
160 }
161
162 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
163
164 // PowerPC has pre-inc load and store's.
165 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
166 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
167 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
168 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
169 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
170 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
171 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
172 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
173 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
174 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
175 if (!Subtarget.hasSPE()) {
176 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
177 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
178 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
179 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
180 }
181
182 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
183 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
184 for (MVT VT : ScalarIntVTs) {
185 setOperationAction(ISD::ADDC, VT, Legal);
186 setOperationAction(ISD::ADDE, VT, Legal);
187 setOperationAction(ISD::SUBC, VT, Legal);
188 setOperationAction(ISD::SUBE, VT, Legal);
189 }
190
191 if (Subtarget.useCRBits()) {
192 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
193
194 if (isPPC64 || Subtarget.hasFPCVT()) {
195 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
196 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
197 isPPC64 ? MVT::i64 : MVT::i32);
198 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
199 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
200 isPPC64 ? MVT::i64 : MVT::i32);
201 } else {
202 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
203 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
204 }
205
206 // PowerPC does not support direct load/store of condition registers.
207 setOperationAction(ISD::LOAD, MVT::i1, Custom);
208 setOperationAction(ISD::STORE, MVT::i1, Custom);
209
210 // FIXME: Remove this once the ANDI glue bug is fixed:
211 if (ANDIGlueBug)
212 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
213
214 for (MVT VT : MVT::integer_valuetypes()) {
215 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
216 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
217 setTruncStoreAction(VT, MVT::i1, Expand);
218 }
219
220 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
221 }
222
223 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
224 // PPC (the libcall is not available).
225 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
226 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
227
228 // We do not currently implement these libm ops for PowerPC.
229 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
230 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
231 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
232 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
233 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
234 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
235
236 // PowerPC has no SREM/UREM instructions unless we are on P9
237 // On P9 we may use a hardware instruction to compute the remainder.
238 // The instructions are not legalized directly because in the cases where the
239 // result of both the remainder and the division is required it is more
240 // efficient to compute the remainder from the result of the division rather
241 // than use the remainder instruction.
242 if (Subtarget.isISA3_0()) {
243 setOperationAction(ISD::SREM, MVT::i32, Custom);
244 setOperationAction(ISD::UREM, MVT::i32, Custom);
245 setOperationAction(ISD::SREM, MVT::i64, Custom);
246 setOperationAction(ISD::UREM, MVT::i64, Custom);
247 } else {
248 setOperationAction(ISD::SREM, MVT::i32, Expand);
249 setOperationAction(ISD::UREM, MVT::i32, Expand);
250 setOperationAction(ISD::SREM, MVT::i64, Expand);
251 setOperationAction(ISD::UREM, MVT::i64, Expand);
252 }
253
254 if (Subtarget.hasP9Vector()) {
255 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
256 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
257 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
258 }
259
260 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
261 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
262 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
263 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
264 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
265 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
266 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
267 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
268 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
269
270 // We don't support sin/cos/sqrt/fmod/pow
271 setOperationAction(ISD::FSIN , MVT::f64, Expand);
272 setOperationAction(ISD::FCOS , MVT::f64, Expand);
273 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
274 setOperationAction(ISD::FREM , MVT::f64, Expand);
275 setOperationAction(ISD::FPOW , MVT::f64, Expand);
276 setOperationAction(ISD::FSIN , MVT::f32, Expand);
277 setOperationAction(ISD::FCOS , MVT::f32, Expand);
278 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
279 setOperationAction(ISD::FREM , MVT::f32, Expand);
280 setOperationAction(ISD::FPOW , MVT::f32, Expand);
281 if (Subtarget.hasSPE()) {
282 setOperationAction(ISD::FMA , MVT::f64, Expand);
283 setOperationAction(ISD::FMA , MVT::f32, Expand);
284 } else {
285 setOperationAction(ISD::FMA , MVT::f64, Legal);
286 setOperationAction(ISD::FMA , MVT::f32, Legal);
287 }
288
289 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
290
291 // If we're enabling GP optimizations, use hardware square root
292 if (!Subtarget.hasFSQRT() &&
293 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
294 Subtarget.hasFRE()))
295 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
296
297 if (!Subtarget.hasFSQRT() &&
298 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
299 Subtarget.hasFRES()))
300 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
301
302 if (Subtarget.hasFCPSGN()) {
303 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
304 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
305 } else {
306 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
307 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
308 }
309
310 if (Subtarget.hasFPRND()) {
311 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
312 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
313 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
314 setOperationAction(ISD::FROUND, MVT::f64, Legal);
315
316 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
317 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
318 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
319 setOperationAction(ISD::FROUND, MVT::f32, Legal);
320 }
321
322 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
323 // to speed up scalar BSWAP64.
324 // CTPOP or CTTZ were introduced in P8/P9 respectively
325 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
326 if (Subtarget.isISA3_0()) {
327 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
328 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
329 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
330 } else {
331 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
332 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
333 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
334 }
335
336 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
337 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
338 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
339 } else {
340 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
341 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
342 }
343
344 // PowerPC does not have ROTR
345 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
346 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
347
348 if (!Subtarget.useCRBits()) {
349 // PowerPC does not have Select
350 setOperationAction(ISD::SELECT, MVT::i32, Expand);
351 setOperationAction(ISD::SELECT, MVT::i64, Expand);
352 setOperationAction(ISD::SELECT, MVT::f32, Expand);
353 setOperationAction(ISD::SELECT, MVT::f64, Expand);
354 }
355
356 // PowerPC wants to turn select_cc of FP into fsel when possible.
357 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
358 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
359
360 // PowerPC wants to optimize integer setcc a bit
361 if (!Subtarget.useCRBits())
362 setOperationAction(ISD::SETCC, MVT::i32, Custom);
363
364 // PowerPC does not have BRCOND which requires SetCC
365 if (!Subtarget.useCRBits())
366 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
367
368 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
369
370 if (Subtarget.hasSPE()) {
371 // SPE has built-in conversions
372 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
373 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
374 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
375 } else {
376 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
377 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
378
379 // PowerPC does not have [U|S]INT_TO_FP
380 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
381 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
382 }
383
384 if (Subtarget.hasDirectMove() && isPPC64) {
385 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
386 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
387 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
388 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
389 } else {
390 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
391 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
392 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
393 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
394 }
395
396 // We cannot sextinreg(i1). Expand to shifts.
397 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
398
399 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
400 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
401 // support continuation, user-level threading, and etc.. As a result, no
402 // other SjLj exception interfaces are implemented and please don't build
403 // your own exception handling based on them.
404 // LLVM/Clang supports zero-cost DWARF exception handling.
405 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
406 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
407
408 // We want to legalize GlobalAddress and ConstantPool nodes into the
409 // appropriate instructions to materialize the address.
410 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
411 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
412 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
413 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
414 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
415 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
416 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
417 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
418 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
419 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
420
421 // TRAP is legal.
422 setOperationAction(ISD::TRAP, MVT::Other, Legal);
423
424 // TRAMPOLINE is custom lowered.
425 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
426 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
427
428 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
429 setOperationAction(ISD::VASTART , MVT::Other, Custom);
430
431 if (Subtarget.isSVR4ABI()) {
432 if (isPPC64) {
433 // VAARG always uses double-word chunks, so promote anything smaller.
434 setOperationAction(ISD::VAARG, MVT::i1, Promote);
435 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
436 setOperationAction(ISD::VAARG, MVT::i8, Promote);
437 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
438 setOperationAction(ISD::VAARG, MVT::i16, Promote);
439 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
440 setOperationAction(ISD::VAARG, MVT::i32, Promote);
441 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
442 setOperationAction(ISD::VAARG, MVT::Other, Expand);
443 } else {
444 // VAARG is custom lowered with the 32-bit SVR4 ABI.
445 setOperationAction(ISD::VAARG, MVT::Other, Custom);
446 setOperationAction(ISD::VAARG, MVT::i64, Custom);
447 }
448 } else
449 setOperationAction(ISD::VAARG, MVT::Other, Expand);
450
451 if (Subtarget.isSVR4ABI() && !isPPC64)
452 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
453 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
454 else
455 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
456
457 // Use the default implementation.
458 setOperationAction(ISD::VAEND , MVT::Other, Expand);
459 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
460 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
461 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
462 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
463 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
465 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
466 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
467
468 // We want to custom lower some of our intrinsics.
469 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
470
471 // To handle counter-based loop conditions.
472 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
473
474 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
475 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
476 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
477 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
478
479 // Comparisons that require checking two conditions.
480 if (Subtarget.hasSPE()) {
481 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
482 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
483 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
484 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
485 }
486 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
487 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
488 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
489 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
490 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
491 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
492 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
493 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
494 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
495 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
496 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
497 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
498
499 if (Subtarget.has64BitSupport()) {
500 // They also have instructions for converting between i64 and fp.
501 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
502 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
503 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
504 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
505 // This is just the low 32 bits of a (signed) fp->i64 conversion.
506 // We cannot do this with Promote because i64 is not a legal type.
507 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
508
509 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
510 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
511 } else {
512 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
513 if (Subtarget.hasSPE())
514 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
515 else
516 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
517 }
518
519 // With the instructions enabled under FPCVT, we can do everything.
520 if (Subtarget.hasFPCVT()) {
521 if (Subtarget.has64BitSupport()) {
522 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
523 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
524 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
525 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
526 }
527
528 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
529 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
530 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
531 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
532 }
533
534 if (Subtarget.use64BitRegs()) {
535 // 64-bit PowerPC implementations can support i64 types directly
536 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
537 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
538 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
539 // 64-bit PowerPC wants to expand i128 shifts itself.
540 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
541 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
542 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
543 } else {
544 // 32-bit PowerPC wants to expand i64 shifts itself.
545 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
546 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
547 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
548 }
549
550 if (Subtarget.hasAltivec()) {
551 // First set operation action for all vector types to expand. Then we
552 // will selectively turn on ones that can be effectively codegen'd.
553 for (MVT VT : MVT::vector_valuetypes()) {
554 // add/sub are legal for all supported vector VT's.
555 setOperationAction(ISD::ADD, VT, Legal);
556 setOperationAction(ISD::SUB, VT, Legal);
557
558 // Vector instructions introduced in P8
559 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
560 setOperationAction(ISD::CTPOP, VT, Legal);
561 setOperationAction(ISD::CTLZ, VT, Legal);
562 }
563 else {
564 setOperationAction(ISD::CTPOP, VT, Expand);
565 setOperationAction(ISD::CTLZ, VT, Expand);
566 }
567
568 // Vector instructions introduced in P9
569 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
570 setOperationAction(ISD::CTTZ, VT, Legal);
571 else
572 setOperationAction(ISD::CTTZ, VT, Expand);
573
574 // We promote all shuffles to v16i8.
575 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
576 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
577
578 // We promote all non-typed operations to v4i32.
579 setOperationAction(ISD::AND , VT, Promote);
580 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
581 setOperationAction(ISD::OR , VT, Promote);
582 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
583 setOperationAction(ISD::XOR , VT, Promote);
584 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
585 setOperationAction(ISD::LOAD , VT, Promote);
586 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
587 setOperationAction(ISD::SELECT, VT, Promote);
588 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
589 setOperationAction(ISD::SELECT_CC, VT, Promote);
590 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
591 setOperationAction(ISD::STORE, VT, Promote);
592 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
593
594 // No other operations are legal.
595 setOperationAction(ISD::MUL , VT, Expand);
596 setOperationAction(ISD::SDIV, VT, Expand);
597 setOperationAction(ISD::SREM, VT, Expand);
598 setOperationAction(ISD::UDIV, VT, Expand);
599 setOperationAction(ISD::UREM, VT, Expand);
600 setOperationAction(ISD::FDIV, VT, Expand);
601 setOperationAction(ISD::FREM, VT, Expand);
602 setOperationAction(ISD::FNEG, VT, Expand);
603 setOperationAction(ISD::FSQRT, VT, Expand);
604 setOperationAction(ISD::FLOG, VT, Expand);
605 setOperationAction(ISD::FLOG10, VT, Expand);
606 setOperationAction(ISD::FLOG2, VT, Expand);
607 setOperationAction(ISD::FEXP, VT, Expand);
608 setOperationAction(ISD::FEXP2, VT, Expand);
609 setOperationAction(ISD::FSIN, VT, Expand);
610 setOperationAction(ISD::FCOS, VT, Expand);
611 setOperationAction(ISD::FABS, VT, Expand);
612 setOperationAction(ISD::FFLOOR, VT, Expand);
613 setOperationAction(ISD::FCEIL, VT, Expand);
614 setOperationAction(ISD::FTRUNC, VT, Expand);
615 setOperationAction(ISD::FRINT, VT, Expand);
616 setOperationAction(ISD::FNEARBYINT, VT, Expand);
617 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
618 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
619 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
620 setOperationAction(ISD::MULHU, VT, Expand);
621 setOperationAction(ISD::MULHS, VT, Expand);
622 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
623 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
624 setOperationAction(ISD::UDIVREM, VT, Expand);
625 setOperationAction(ISD::SDIVREM, VT, Expand);
626 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
627 setOperationAction(ISD::FPOW, VT, Expand);
628 setOperationAction(ISD::BSWAP, VT, Expand);
629 setOperationAction(ISD::VSELECT, VT, Expand);
630 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
631 setOperationAction(ISD::ROTL, VT, Expand);
632 setOperationAction(ISD::ROTR, VT, Expand);
633
634 for (MVT InnerVT : MVT::vector_valuetypes()) {
635 setTruncStoreAction(VT, InnerVT, Expand);
636 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
637 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
638 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
639 }
640 }
641
642 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
643 // with merges, splats, etc.
644 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
645
646 setOperationAction(ISD::AND , MVT::v4i32, Legal);
647 setOperationAction(ISD::OR , MVT::v4i32, Legal);
648 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
649 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
650 setOperationAction(ISD::SELECT, MVT::v4i32,
651 Subtarget.useCRBits() ? Legal : Expand);
652 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
653 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
654 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
655 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
656 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
657 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
658 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
659 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
660 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
661
662 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
663 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
664 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
665 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
666
667 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
668 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
669
670 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
671 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
672 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
673 }
674
675 if (Subtarget.hasP8Altivec())
676 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
677 else
678 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
679
680 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
681 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
682
683 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
684 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
685
686 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
687 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
688 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
689 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
690
691 // Altivec does not contain unordered floating-point compare instructions
692 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
693 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
694 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
695 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
696
697 if (Subtarget.hasVSX()) {
698 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
699 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
700 if (Subtarget.hasP8Vector()) {
701 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
702 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
703 }
704 if (Subtarget.hasDirectMove() && isPPC64) {
705 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
706 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
707 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
708 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
709 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
710 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
711 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
712 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
713 }
714 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
715
716 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
717 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
718 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
719 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
720 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
721
722 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
723
724 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
725 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
726
727 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
728 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
729
730 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
731 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
732 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
733 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
734 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
735
736 // Share the Altivec comparison restrictions.
737 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
738 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
739 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
740 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
741
742 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
743 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
744
745 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
746
747 if (Subtarget.hasP8Vector())
748 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
749
750 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
751
752 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
753 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
754 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
755
756 if (Subtarget.hasP8Altivec()) {
757 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
758 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
759 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
760
761 // 128 bit shifts can be accomplished via 3 instructions for SHL and
762 // SRL, but not for SRA because of the instructions available:
763 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
764 // doing
765 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
766 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
767 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
768
769 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
770 }
771 else {
772 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
773 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
774 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
775
776 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
777
778 // VSX v2i64 only supports non-arithmetic operations.
779 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
780 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
781 }
782
783 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
784 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
785 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
786 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
787
788 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
789
790 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
791 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
792 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
793 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
794
795 // Vector operation legalization checks the result type of
796 // SIGN_EXTEND_INREG, overall legalization checks the inner type.
797 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
798 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
799 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
800 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
801
802 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
803 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
804 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
805 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
806
807 if (Subtarget.hasDirectMove())
808 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
809 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
810
811 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
812 }
813
814 if (Subtarget.hasP8Altivec()) {
815 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
816 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
817 }
818
819 if (Subtarget.hasP9Vector()) {
820 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
821 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
822
823 // 128 bit shifts can be accomplished via 3 instructions for SHL and
824 // SRL, but not for SRA because of the instructions available:
825 // VS{RL} and VS{RL}O.
826 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
827 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
828 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
829
830 if (EnableQuadPrecision) {
831 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
832 setOperationAction(ISD::FADD, MVT::f128, Legal);
833 setOperationAction(ISD::FSUB, MVT::f128, Legal);
834 setOperationAction(ISD::FDIV, MVT::f128, Legal);
835 setOperationAction(ISD::FMUL, MVT::f128, Legal);
836 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
837 // No extending loads to f128 on PPC.
838 for (MVT FPT : MVT::fp_valuetypes())
839 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
840 setOperationAction(ISD::FMA, MVT::f128, Legal);
841 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
842 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
843 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
844 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
845 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
846 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
847
848 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
849 setOperationAction(ISD::FRINT, MVT::f128, Legal);
850 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
851 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
852 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
853 setOperationAction(ISD::FROUND, MVT::f128, Legal);
854
855 setOperationAction(ISD::SELECT, MVT::f128, Expand);
856 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
857 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
858 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
859 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
860 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
861 // No implementation for these ops for PowerPC.
862 setOperationAction(ISD::FSIN , MVT::f128, Expand);
863 setOperationAction(ISD::FCOS , MVT::f128, Expand);
864 setOperationAction(ISD::FPOW, MVT::f128, Expand);
865 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
866 setOperationAction(ISD::FREM, MVT::f128, Expand);
867 }
868
869 }
870
871 if (Subtarget.hasP9Altivec()) {
872 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
873 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
874 }
875 }
876
877 if (Subtarget.hasQPX()) {
878 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
879 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
880 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
881 setOperationAction(ISD::FREM, MVT::v4f64, Expand);
882
883 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
884 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
885
886 setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
887 setOperationAction(ISD::STORE , MVT::v4f64, Custom);
888
889 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
890 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
891
892 if (!Subtarget.useCRBits())
893 setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
894 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
895
896 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
897 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
898 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
899 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
900 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
901 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
902 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
903
904 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
905 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
906
907 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
908 setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
909 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
910
911 setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
912 setOperationAction(ISD::FABS , MVT::v4f64, Legal);
913 setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
914 setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
915 setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
916 setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
917 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
918 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
919 setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
920 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
921
922 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
923 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
924
925 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
926 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
927
928 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
929
930 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
931 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
932 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
933 setOperationAction(ISD::FREM, MVT::v4f32, Expand);
934
935 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
936 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
937
938 setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
939 setOperationAction(ISD::STORE , MVT::v4f32, Custom);
940
941 if (!Subtarget.useCRBits())
942 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
943 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
944
945 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
946 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
947 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
948 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
949 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
950 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
951 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
952
953 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
954 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
955
956 setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
957 setOperationAction(ISD::FABS , MVT::v4f32, Legal);
958 setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
959 setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
960 setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
961 setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
962 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
963 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
964 setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
965 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
966
967 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
968 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
969
970 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
971 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
972
973 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
974
975 setOperationAction(ISD::AND , MVT::v4i1, Legal);
976 setOperationAction(ISD::OR , MVT::v4i1, Legal);
977 setOperationAction(ISD::XOR , MVT::v4i1, Legal);
978
979 if (!Subtarget.useCRBits())
980 setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
981 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
982
983 setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
984 setOperationAction(ISD::STORE , MVT::v4i1, Custom);
985
986 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
987 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
988 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
989 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
990 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
991 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
992 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
993
994 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
995 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
996
997 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
998
999 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1000 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1001 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1002 setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1003
1004 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1005 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1006 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1007 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1008
1009 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1010 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1011
1012 // These need to set FE_INEXACT, and so cannot be vectorized here.
1013 setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1014 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1015
1016 if (TM.Options.UnsafeFPMath) {
1017 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1018 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1019
1020 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1021 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1022 } else {
1023 setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1024 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1025
1026 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1027 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1028 }
1029 }
1030
1031 if (Subtarget.has64BitSupport())
1032 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1033
1034 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1035
1036 if (!isPPC64) {
1037 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1038 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1039 }
1040
1041 setBooleanContents(ZeroOrOneBooleanContent);
1042
1043 if (Subtarget.hasAltivec()) {
1044 // Altivec instructions set fields to all zeros or all ones.
1045 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1046 }
1047
1048 if (!isPPC64) {
1049 // These libcalls are not available in 32-bit.
1050 setLibcallName(RTLIB::SHL_I128, nullptr);
1051 setLibcallName(RTLIB::SRL_I128, nullptr);
1052 setLibcallName(RTLIB::SRA_I128, nullptr);
1053 }
1054
1055 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1056
1057 // We have target-specific dag combine patterns for the following nodes:
1058 setTargetDAGCombine(ISD::SHL);
1059 setTargetDAGCombine(ISD::SRA);
1060 setTargetDAGCombine(ISD::SRL);
1061 setTargetDAGCombine(ISD::SINT_TO_FP);
1062 setTargetDAGCombine(ISD::BUILD_VECTOR);
1063 if (Subtarget.hasFPCVT())
1064 setTargetDAGCombine(ISD::UINT_TO_FP);
1065 setTargetDAGCombine(ISD::LOAD);
1066 setTargetDAGCombine(ISD::STORE);
1067 setTargetDAGCombine(ISD::BR_CC);
1068 if (Subtarget.useCRBits())
1069 setTargetDAGCombine(ISD::BRCOND);
1070 setTargetDAGCombine(ISD::BSWAP);
1071 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1072 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1073 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1074
1075 setTargetDAGCombine(ISD::SIGN_EXTEND);
1076 setTargetDAGCombine(ISD::ZERO_EXTEND);
1077 setTargetDAGCombine(ISD::ANY_EXTEND);
1078
1079 if (Subtarget.useCRBits()) {
1080 setTargetDAGCombine(ISD::TRUNCATE);
1081 setTargetDAGCombine(ISD::SETCC);
1082 setTargetDAGCombine(ISD::SELECT_CC);
1083 }
1084
1085 // Use reciprocal estimates.
1086 if (TM.Options.UnsafeFPMath) {
1087 setTargetDAGCombine(ISD::FDIV);
1088 setTargetDAGCombine(ISD::FSQRT);
1089 }
1090
1091 // Darwin long double math library functions have $LDBL128 appended.
1092 if (Subtarget.isDarwin()) {
1093 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1094 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1095 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1096 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1097 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1098 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1099 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1100 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1101 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1102 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1103 }
1104
1105 if (EnableQuadPrecision) {
1106 setLibcallName(RTLIB::LOG_F128, "logf128");
1107 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1108 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1109 setLibcallName(RTLIB::EXP_F128, "expf128");
1110 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1111 setLibcallName(RTLIB::SIN_F128, "sinf128");
1112 setLibcallName(RTLIB::COS_F128, "cosf128");
1113 setLibcallName(RTLIB::POW_F128, "powf128");
1114 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1115 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1116 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1117 setLibcallName(RTLIB::REM_F128, "fmodf128");
1118 }
1119
1120 // With 32 condition bits, we don't need to sink (and duplicate) compares
1121 // aggressively in CodeGenPrep.
1122 if (Subtarget.useCRBits()) {
1123 setHasMultipleConditionRegisters();
1124 setJumpIsExpensive();
1125 }
1126
1127 setMinFunctionAlignment(2);
1128 if (Subtarget.isDarwin())
1129 setPrefFunctionAlignment(4);
1130
1131 switch (Subtarget.getDarwinDirective()) {
1132 default: break;
1133 case PPC::DIR_970:
1134 case PPC::DIR_A2:
1135 case PPC::DIR_E500:
1136 case PPC::DIR_E500mc:
1137 case PPC::DIR_E5500:
1138 case PPC::DIR_PWR4:
1139 case PPC::DIR_PWR5:
1140 case PPC::DIR_PWR5X:
1141 case PPC::DIR_PWR6:
1142 case PPC::DIR_PWR6X:
1143 case PPC::DIR_PWR7:
1144 case PPC::DIR_PWR8:
1145 case PPC::DIR_PWR9:
1146 setPrefFunctionAlignment(4);
1147 setPrefLoopAlignment(4);
1148 break;
1149 }
1150
1151 if (Subtarget.enableMachineScheduler())
1152 setSchedulingPreference(Sched::Source);
1153 else
1154 setSchedulingPreference(Sched::Hybrid);
1155
1156 computeRegisterProperties(STI.getRegisterInfo());
1157
1158 // The Freescale cores do better with aggressive inlining of memcpy and
1159 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1160 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1161 Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1162 MaxStoresPerMemset = 32;
1163 MaxStoresPerMemsetOptSize = 16;
1164 MaxStoresPerMemcpy = 32;
1165 MaxStoresPerMemcpyOptSize = 8;
1166 MaxStoresPerMemmove = 32;
1167 MaxStoresPerMemmoveOptSize = 8;
1168 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1169 // The A2 also benefits from (very) aggressive inlining of memcpy and
1170 // friends. The overhead of a the function call, even when warm, can be
1171 // over one hundred cycles.
1172 MaxStoresPerMemset = 128;
1173 MaxStoresPerMemcpy = 128;
1174 MaxStoresPerMemmove = 128;
1175 MaxLoadsPerMemcmp = 128;
1176 } else {
1177 MaxLoadsPerMemcmp = 8;
1178 MaxLoadsPerMemcmpOptSize = 4;
1179 }
1180}
1181
1182/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1183/// the desired ByVal argument alignment.
1184static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1185 unsigned MaxMaxAlign) {
1186 if (MaxAlign == MaxMaxAlign)
1187 return;
1188 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1189 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1190 MaxAlign = 32;
1191 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1192 MaxAlign = 16;
1193 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1194 unsigned EltAlign = 0;
1195 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1196 if (EltAlign > MaxAlign)
1197 MaxAlign = EltAlign;
1198 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1199 for (auto *EltTy : STy->elements()) {
1200 unsigned EltAlign = 0;
1201 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1202 if (EltAlign > MaxAlign)
1203 MaxAlign = EltAlign;
1204 if (MaxAlign == MaxMaxAlign)
1205 break;
1206 }
1207 }
1208}
1209
1210/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1211/// function arguments in the caller parameter area.
1212unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1213 const DataLayout &DL) const {
1214 // Darwin passes everything on 4 byte boundary.
1215 if (Subtarget.isDarwin())
1216 return 4;
1217
1218 // 16byte and wider vectors are passed on 16byte boundary.
1219 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1220 unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1221 if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1222 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1223 return Align;
1224}
1225
1226unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1227 CallingConv:: ID CC,
1228 EVT VT) const {
1229 if (Subtarget.hasSPE() && VT == MVT::f64)
1230 return 2;
1231 return PPCTargetLowering::getNumRegisters(Context, VT);
1232}
1233
1234MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1235 CallingConv:: ID CC,
1236 EVT VT) const {
1237 if (Subtarget.hasSPE() && VT == MVT::f64)
1238 return MVT::i32;
1239 return PPCTargetLowering::getRegisterType(Context, VT);
1240}
1241
1242bool PPCTargetLowering::useSoftFloat() const {
1243 return Subtarget.useSoftFloat();
1244}
1245
1246bool PPCTargetLowering::hasSPE() const {
1247 return Subtarget.hasSPE();
1248}
1249
1250const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1251 switch ((PPCISD::NodeType)Opcode) {
1252 case PPCISD::FIRST_NUMBER: break;
1253 case PPCISD::FSEL: return "PPCISD::FSEL";
1254 case PPCISD::FCFID: return "PPCISD::FCFID";
1255 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1256 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1257 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1258 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1259 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1260 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1261 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1262 case PPCISD::FP_TO_UINT_IN_VSR:
1263 return "PPCISD::FP_TO_UINT_IN_VSR,";
1264 case PPCISD::FP_TO_SINT_IN_VSR:
1265 return "PPCISD::FP_TO_SINT_IN_VSR";
1266 case PPCISD::FRE: return "PPCISD::FRE";
1267 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1268 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1269 case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
1270 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
1271 case PPCISD::VPERM: return "PPCISD::VPERM";
1272 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1273 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1274 case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
1275 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1276 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1277 case PPCISD::CMPB: return "PPCISD::CMPB";
1278 case PPCISD::Hi: return "PPCISD::Hi";
1279 case PPCISD::Lo: return "PPCISD::Lo";
1280 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1281 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1282 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1283 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1284 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1285 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1286 case PPCISD::SRL: return "PPCISD::SRL";
1287 case PPCISD::SRA: return "PPCISD::SRA";
1288 case PPCISD::SHL: return "PPCISD::SHL";
1289 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1290 case PPCISD::CALL: return "PPCISD::CALL";
1291 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1292 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1293 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1294 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1295 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1296 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1297 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1298 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1299 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1300 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1301 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1302 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1303 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1304 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1305 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1306 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1307 case PPCISD::VCMP: return "PPCISD::VCMP";
1308 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1309 case PPCISD::LBRX: return "PPCISD::LBRX";
1310 case PPCISD::STBRX: return "PPCISD::STBRX";
1311 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1312 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1313 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1314 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1315 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1316 case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
1317 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1318 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1319 case PPCISD::ST_VSR_SCAL_INT:
1320 return "PPCISD::ST_VSR_SCAL_INT";
1321 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1322 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1323 case PPCISD::BDZ: return "PPCISD::BDZ";
1324 case PPCISD::MFFS: return "PPCISD::MFFS";
1325 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1326 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1327 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1328 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1329 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1330 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1331 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1332 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1333 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1334 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1335 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1336 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1337 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1338 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1339 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1340 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1341 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1342 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1343 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1344 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1345 case PPCISD::SC: return "PPCISD::SC";
1346 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1347 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1348 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1349 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1350 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1351 case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1352 case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1353 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1354 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1355 case PPCISD::QBFLT: return "PPCISD::QBFLT";
1356 case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1357 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1358 }
1359 return nullptr;
1360}
1361
1362EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1363 EVT VT) const {
1364 if (!VT.isVector())
1365 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1366
1367 if (Subtarget.hasQPX())
1368 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1369
1370 return VT.changeVectorElementTypeToInteger();
1371}
1372
1373bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1374 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")(static_cast <bool> (VT.isFloatingPoint() && "Non-floating-point FMA?"
) ? void (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1374, __extension__ __PRETTY_FUNCTION__))
;
1375 return true;
1376}
1377
1378//===----------------------------------------------------------------------===//
1379// Node matching predicates, for use by the tblgen matching code.
1380//===----------------------------------------------------------------------===//
1381
1382/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1383static bool isFloatingPointZero(SDValue Op) {
1384 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1385 return CFP->getValueAPF().isZero();
1386 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1387 // Maybe this has already been legalized into the constant pool?
1388 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1389 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1390 return CFP->getValueAPF().isZero();
1391 }
1392 return false;
1393}
1394
1395/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1396/// true if Op is undef or if it matches the specified value.
1397static bool isConstantOrUndef(int Op, int Val) {
1398 return Op < 0 || Op == Val;
1399}
1400
1401/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1402/// VPKUHUM instruction.
1403/// The ShuffleKind distinguishes between big-endian operations with
1404/// two different inputs (0), either-endian operations with two identical
1405/// inputs (1), and little-endian operations with two different inputs (2).
1406/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1407bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1408 SelectionDAG &DAG) {
1409 bool IsLE = DAG.getDataLayout().isLittleEndian();
1410 if (ShuffleKind == 0) {
1411 if (IsLE)
1412 return false;
1413 for (unsigned i = 0; i != 16; ++i)
1414 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1415 return false;
1416 } else if (ShuffleKind == 2) {
1417 if (!IsLE)
1418 return false;
1419 for (unsigned i = 0; i != 16; ++i)
1420 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1421 return false;
1422 } else if (ShuffleKind == 1) {
1423 unsigned j = IsLE ? 0 : 1;
1424 for (unsigned i = 0; i != 8; ++i)
1425 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1426 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1427 return false;
1428 }
1429 return true;
1430}
1431
1432/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1433/// VPKUWUM instruction.
1434/// The ShuffleKind distinguishes between big-endian operations with
1435/// two different inputs (0), either-endian operations with two identical
1436/// inputs (1), and little-endian operations with two different inputs (2).
1437/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1438bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1439 SelectionDAG &DAG) {
1440 bool IsLE = DAG.getDataLayout().isLittleEndian();
1441 if (ShuffleKind == 0) {
1442 if (IsLE)
1443 return false;
1444 for (unsigned i = 0; i != 16; i += 2)
1445 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1446 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1447 return false;
1448 } else if (ShuffleKind == 2) {
1449 if (!IsLE)
1450 return false;
1451 for (unsigned i = 0; i != 16; i += 2)
1452 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1453 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1454 return false;
1455 } else if (ShuffleKind == 1) {
1456 unsigned j = IsLE ? 0 : 2;
1457 for (unsigned i = 0; i != 8; i += 2)
1458 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1459 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1460 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1461 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1462 return false;
1463 }
1464 return true;
1465}
1466
1467/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1468/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1469/// current subtarget.
1470///
1471/// The ShuffleKind distinguishes between big-endian operations with
1472/// two different inputs (0), either-endian operations with two identical
1473/// inputs (1), and little-endian operations with two different inputs (2).
1474/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1475bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1476 SelectionDAG &DAG) {
1477 const PPCSubtarget& Subtarget =
1478 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1479 if (!Subtarget.hasP8Vector())
1480 return false;
1481
1482 bool IsLE = DAG.getDataLayout().isLittleEndian();
1483 if (ShuffleKind == 0) {
1484 if (IsLE)
1485 return false;
1486 for (unsigned i = 0; i != 16; i += 4)
1487 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1488 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1489 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1490 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1491 return false;
1492 } else if (ShuffleKind == 2) {
1493 if (!IsLE)
1494 return false;
1495 for (unsigned i = 0; i != 16; i += 4)
1496 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1497 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1498 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1499 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1500 return false;
1501 } else if (ShuffleKind == 1) {
1502 unsigned j = IsLE ? 0 : 4;
1503 for (unsigned i = 0; i != 8; i += 4)
1504 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1505 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1506 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1507 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1508 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1509 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1510 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1511 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1512 return false;
1513 }
1514 return true;
1515}
1516
1517/// isVMerge - Common function, used to match vmrg* shuffles.
1518///
1519static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1520 unsigned LHSStart, unsigned RHSStart) {
1521 if (N->getValueType(0) != MVT::v16i8)
1522 return false;
1523 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(static_cast <bool> ((UnitSize == 1 || UnitSize == 2 ||
UnitSize == 4) && "Unsupported merge size!") ? void (
0) : __assert_fail ("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1524, __extension__ __PRETTY_FUNCTION__))
1524 "Unsupported merge size!")(static_cast <bool> ((UnitSize == 1 || UnitSize == 2 ||
UnitSize == 4) && "Unsupported merge size!") ? void (
0) : __assert_fail ("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1524, __extension__ __PRETTY_FUNCTION__))
;
1525
1526 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1527 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1528 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1529 LHSStart+j+i*UnitSize) ||
1530 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1531 RHSStart+j+i*UnitSize))
1532 return false;
1533 }
1534 return true;
1535}
1536
1537/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1538/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1539/// The ShuffleKind distinguishes between big-endian merges with two
1540/// different inputs (0), either-endian merges with two identical inputs (1),
1541/// and little-endian merges with two different inputs (2). For the latter,
1542/// the input operands are swapped (see PPCInstrAltivec.td).
1543bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1544 unsigned ShuffleKind, SelectionDAG &DAG) {
1545 if (DAG.getDataLayout().isLittleEndian()) {
1546 if (ShuffleKind == 1) // unary
1547 return isVMerge(N, UnitSize, 0, 0);
1548 else if (ShuffleKind == 2) // swapped
1549 return isVMerge(N, UnitSize, 0, 16);
1550 else
1551 return false;
1552 } else {
1553 if (ShuffleKind == 1) // unary
1554 return isVMerge(N, UnitSize, 8, 8);
1555 else if (ShuffleKind == 0) // normal
1556 return isVMerge(N, UnitSize, 8, 24);
1557 else
1558 return false;
1559 }
1560}
1561
1562/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1563/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1564/// The ShuffleKind distinguishes between big-endian merges with two
1565/// different inputs (0), either-endian merges with two identical inputs (1),
1566/// and little-endian merges with two different inputs (2). For the latter,
1567/// the input operands are swapped (see PPCInstrAltivec.td).
1568bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1569 unsigned ShuffleKind, SelectionDAG &DAG) {
1570 if (DAG.getDataLayout().isLittleEndian()) {
1571 if (ShuffleKind == 1) // unary
1572 return isVMerge(N, UnitSize, 8, 8);
1573 else if (ShuffleKind == 2) // swapped
1574 return isVMerge(N, UnitSize, 8, 24);
1575 else
1576 return false;
1577 } else {
1578 if (ShuffleKind == 1) // unary
1579 return isVMerge(N, UnitSize, 0, 0);
1580 else if (ShuffleKind == 0) // normal
1581 return isVMerge(N, UnitSize, 0, 16);
1582 else
1583 return false;
1584 }
1585}
1586
1587/**
1588 * Common function used to match vmrgew and vmrgow shuffles
1589 *
1590 * The indexOffset determines whether to look for even or odd words in
1591 * the shuffle mask. This is based on the of the endianness of the target
1592 * machine.
1593 * - Little Endian:
1594 * - Use offset of 0 to check for odd elements
1595 * - Use offset of 4 to check for even elements
1596 * - Big Endian:
1597 * - Use offset of 0 to check for even elements
1598 * - Use offset of 4 to check for odd elements
1599 * A detailed description of the vector element ordering for little endian and
1600 * big endian can be found at
1601 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1602 * Targeting your applications - what little endian and big endian IBM XL C/C++
1603 * compiler differences mean to you
1604 *
1605 * The mask to the shuffle vector instruction specifies the indices of the
1606 * elements from the two input vectors to place in the result. The elements are
1607 * numbered in array-access order, starting with the first vector. These vectors
1608 * are always of type v16i8, thus each vector will contain 16 elements of size
1609 * 8. More info on the shuffle vector can be found in the
1610 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1611 * Language Reference.
1612 *
1613 * The RHSStartValue indicates whether the same input vectors are used (unary)
1614 * or two different input vectors are used, based on the following:
1615 * - If the instruction uses the same vector for both inputs, the range of the
1616 * indices will be 0 to 15. In this case, the RHSStart value passed should
1617 * be 0.
1618 * - If the instruction has two different vectors then the range of the
1619 * indices will be 0 to 31. In this case, the RHSStart value passed should
1620 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1621 * to 31 specify elements in the second vector).
1622 *
1623 * \param[in] N The shuffle vector SD Node to analyze
1624 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1625 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1626 * vector to the shuffle_vector instruction
1627 * \return true iff this shuffle vector represents an even or odd word merge
1628 */
1629static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1630 unsigned RHSStartValue) {
1631 if (N->getValueType(0) != MVT::v16i8)
1632 return false;
1633
1634 for (unsigned i = 0; i < 2; ++i)
1635 for (unsigned j = 0; j < 4; ++j)
1636 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1637 i*RHSStartValue+j+IndexOffset) ||
1638 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1639 i*RHSStartValue+j+IndexOffset+8))
1640 return false;
1641 return true;
1642}
1643
1644/**
1645 * Determine if the specified shuffle mask is suitable for the vmrgew or
1646 * vmrgow instructions.
1647 *
1648 * \param[in] N The shuffle vector SD Node to analyze
1649 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1650 * \param[in] ShuffleKind Identify the type of merge:
1651 * - 0 = big-endian merge with two different inputs;
1652 * - 1 = either-endian merge with two identical inputs;
1653 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1654 * little-endian merges).
1655 * \param[in] DAG The current SelectionDAG
1656 * \return true iff this shuffle mask
1657 */
1658bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1659 unsigned ShuffleKind, SelectionDAG &DAG) {
1660 if (DAG.getDataLayout().isLittleEndian()) {
1661 unsigned indexOffset = CheckEven ? 4 : 0;
1662 if (ShuffleKind == 1) // Unary
1663 return isVMerge(N, indexOffset, 0);
1664 else if (ShuffleKind == 2) // swapped
1665 return isVMerge(N, indexOffset, 16);
1666 else
1667 return false;
1668 }
1669 else {
1670 unsigned indexOffset = CheckEven ? 0 : 4;
1671 if (ShuffleKind == 1) // Unary
1672 return isVMerge(N, indexOffset, 0);
1673 else if (ShuffleKind == 0) // Normal
1674 return isVMerge(N, indexOffset, 16);
1675 else
1676 return false;
1677 }
1678 return false;
1679}
1680
1681/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1682/// amount, otherwise return -1.
1683/// The ShuffleKind distinguishes between big-endian operations with two
1684/// different inputs (0), either-endian operations with two identical inputs
1685/// (1), and little-endian operations with two different inputs (2). For the
1686/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1687int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1688 SelectionDAG &DAG) {
1689 if (N->getValueType(0) != MVT::v16i8)
1690 return -1;
1691
1692 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1693
1694 // Find the first non-undef value in the shuffle mask.
1695 unsigned i;
1696 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1697 /*search*/;
1698
1699 if (i == 16) return -1; // all undef.
1700
1701 // Otherwise, check to see if the rest of the elements are consecutively
1702 // numbered from this value.
1703 unsigned ShiftAmt = SVOp->getMaskElt(i);
1704 if (ShiftAmt < i) return -1;
1705
1706 ShiftAmt -= i;
1707 bool isLE = DAG.getDataLayout().isLittleEndian();
1708
1709 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1710 // Check the rest of the elements to see if they are consecutive.
1711 for (++i; i != 16; ++i)
1712 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1713 return -1;
1714 } else if (ShuffleKind == 1) {
1715 // Check the rest of the elements to see if they are consecutive.
1716 for (++i; i != 16; ++i)
1717 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1718 return -1;
1719 } else
1720 return -1;
1721
1722 if (isLE)
1723 ShiftAmt = 16 - ShiftAmt;
1724
1725 return ShiftAmt;
1726}
1727
1728/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1729/// specifies a splat of a single element that is suitable for input to
1730/// VSPLTB/VSPLTH/VSPLTW.
1731bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1732 assert(N->getValueType(0) == MVT::v16i8 &&(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& (EltSize == 1 || EltSize == 2 || EltSize == 4)) ?
void (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1733, __extension__ __PRETTY_FUNCTION__))
1733 (EltSize == 1 || EltSize == 2 || EltSize == 4))(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& (EltSize == 1 || EltSize == 2 || EltSize == 4)) ?
void (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1733, __extension__ __PRETTY_FUNCTION__))
;
1734
1735 // The consecutive indices need to specify an element, not part of two
1736 // different elements. So abandon ship early if this isn't the case.
1737 if (N->getMaskElt(0) % EltSize != 0)
1738 return false;
1739
1740 // This is a splat operation if each element of the permute is the same, and
1741 // if the value doesn't reference the second vector.
1742 unsigned ElementBase = N->getMaskElt(0);
1743
1744 // FIXME: Handle UNDEF elements too!
1745 if (ElementBase >= 16)
1746 return false;
1747
1748 // Check that the indices are consecutive, in the case of a multi-byte element
1749 // splatted with a v16i8 mask.
1750 for (unsigned i = 1; i != EltSize; ++i)
1751 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1752 return false;
1753
1754 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1755 if (N->getMaskElt(i) < 0) continue;
1756 for (unsigned j = 0; j != EltSize; ++j)
1757 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1758 return false;
1759 }
1760 return true;
1761}
1762
1763/// Check that the mask is shuffling N byte elements. Within each N byte
1764/// element of the mask, the indices could be either in increasing or
1765/// decreasing order as long as they are consecutive.
1766/// \param[in] N the shuffle vector SD Node to analyze
1767/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1768/// Word/DoubleWord/QuadWord).
1769/// \param[in] StepLen the delta indices number among the N byte element, if
1770/// the mask is in increasing/decreasing order then it is 1/-1.
1771/// \return true iff the mask is shuffling N byte elements.
1772static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1773 int StepLen) {
1774 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(static_cast <bool> ((Width == 2 || Width == 4 || Width
== 8 || Width == 16) && "Unexpected element width.")
? void (0) : __assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1775, __extension__ __PRETTY_FUNCTION__))
1775 "Unexpected element width.")(static_cast <bool> ((Width == 2 || Width == 4 || Width
== 8 || Width == 16) && "Unexpected element width.")
? void (0) : __assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1775, __extension__ __PRETTY_FUNCTION__))
;
1776 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(static_cast <bool> ((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.") ? void (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1776, __extension__ __PRETTY_FUNCTION__))
;
1777
1778 unsigned NumOfElem = 16 / Width;
1779 unsigned MaskVal[16]; // Width is never greater than 16
1780 for (unsigned i = 0; i < NumOfElem; ++i) {
1781 MaskVal[0] = N->getMaskElt(i * Width);
1782 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1783 return false;
1784 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1785 return false;
1786 }
1787
1788 for (unsigned int j = 1; j < Width; ++j) {
1789 MaskVal[j] = N->getMaskElt(i * Width + j);
1790 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1791 return false;
1792 }
1793 }
1794 }
1795
1796 return true;
1797}
1798
1799bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1800 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1801 if (!isNByteElemShuffleMask(N, 4, 1))
1802 return false;
1803
1804 // Now we look at mask elements 0,4,8,12
1805 unsigned M0 = N->getMaskElt(0) / 4;
1806 unsigned M1 = N->getMaskElt(4) / 4;
1807 unsigned M2 = N->getMaskElt(8) / 4;
1808 unsigned M3 = N->getMaskElt(12) / 4;
1809 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1810 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1811
1812 // Below, let H and L be arbitrary elements of the shuffle mask
1813 // where H is in the range [4,7] and L is in the range [0,3].
1814 // H, 1, 2, 3 or L, 5, 6, 7
1815 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1816 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1817 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1818 InsertAtByte = IsLE ? 12 : 0;
1819 Swap = M0 < 4;
1820 return true;
1821 }
1822 // 0, H, 2, 3 or 4, L, 6, 7
1823 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1824 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1825 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1826 InsertAtByte = IsLE ? 8 : 4;
1827 Swap = M1 < 4;
1828 return true;
1829 }
1830 // 0, 1, H, 3 or 4, 5, L, 7
1831 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1832 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1833 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1834 InsertAtByte = IsLE ? 4 : 8;
1835 Swap = M2 < 4;
1836 return true;
1837 }
1838 // 0, 1, 2, H or 4, 5, 6, L
1839 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1840 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1841 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1842 InsertAtByte = IsLE ? 0 : 12;
1843 Swap = M3 < 4;
1844 return true;
1845 }
1846
1847 // If both vector operands for the shuffle are the same vector, the mask will
1848 // contain only elements from the first one and the second one will be undef.
1849 if (N->getOperand(1).isUndef()) {
1850 ShiftElts = 0;
1851 Swap = true;
1852 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1853 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1854 InsertAtByte = IsLE ? 12 : 0;
1855 return true;
1856 }
1857 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1858 InsertAtByte = IsLE ? 8 : 4;
1859 return true;
1860 }
1861 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1862 InsertAtByte = IsLE ? 4 : 8;
1863 return true;
1864 }
1865 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1866 InsertAtByte = IsLE ? 0 : 12;
1867 return true;
1868 }
1869 }
1870
1871 return false;
1872}
1873
1874bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1875 bool &Swap, bool IsLE) {
1876 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1876, __extension__ __PRETTY_FUNCTION__))
;
7
Within the expansion of the macro 'assert':
1877 // Ensure each byte index of the word is consecutive.
1878 if (!isNByteElemShuffleMask(N, 4, 1))
8
Assuming the condition is false
9
Taking false branch
1879 return false;
1880
1881 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1882 unsigned M0 = N->getMaskElt(0) / 4;
1883 unsigned M1 = N->getMaskElt(4) / 4;
1884 unsigned M2 = N->getMaskElt(8) / 4;
1885 unsigned M3 = N->getMaskElt(12) / 4;
1886
1887 // If both vector operands for the shuffle are the same vector, the mask will
1888 // contain only elements from the first one and the second one will be undef.
1889 if (N->getOperand(1).isUndef()) {
10
Taking false branch
1890 assert(M0 < 4 && "Indexing into an undef vector?")(static_cast <bool> (M0 < 4 && "Indexing into an undef vector?"
) ? void (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1890, __extension__ __PRETTY_FUNCTION__))
;
1891 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1892 return false;
1893
1894 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1895 Swap = false;
1896 return true;
1897 }
1898
1899 // Ensure each word index of the ShuffleVector Mask is consecutive.
1900 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
11
Assuming the condition is false
12
Assuming the condition is false
13
Assuming the condition is false
14
Taking false branch
1901 return false;
1902
1903 if (IsLE) {
15
Assuming 'IsLE' is not equal to 0
16
Taking true branch
1904 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
17
Assuming 'M0' is not equal to 0
18
Assuming 'M0' is not equal to 7
19
Assuming 'M0' is not equal to 6
20
Assuming 'M0' is not equal to 5
21
Taking false branch
1905 // Input vectors don't need to be swapped if the leading element
1906 // of the result is one of the 3 left elements of the second vector
1907 // (or if there is no shift to be done at all).
1908 Swap = false;
1909 ShiftElts = (8 - M0) % 8;
1910 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
22
Assuming 'M0' is not equal to 4
23
Assuming 'M0' is not equal to 3
24
Assuming 'M0' is not equal to 2
25
Assuming 'M0' is not equal to 1
26
Taking false branch
1911 // Input vectors need to be swapped if the leading element
1912 // of the result is one of the 3 left elements of the first vector
1913 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1914 Swap = true;
1915 ShiftElts = (4 - M0) % 4;
1916 }
1917
1918 return true;
27
Returning without writing to 'ShiftElts'
1919 } else { // BE
1920 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1921 // Input vectors don't need to be swapped if the leading element
1922 // of the result is one of the 4 elements of the first vector.
1923 Swap = false;
1924 ShiftElts = M0;
1925 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1926 // Input vectors need to be swapped if the leading element
1927 // of the result is one of the 4 elements of the right vector.
1928 Swap = true;
1929 ShiftElts = M0 - 4;
1930 }
1931
1932 return true;
1933 }
1934}
1935
1936bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1937 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1937, __extension__ __PRETTY_FUNCTION__))
;
1938
1939 if (!isNByteElemShuffleMask(N, Width, -1))
1940 return false;
1941
1942 for (int i = 0; i < 16; i += Width)
1943 if (N->getMaskElt(i) != i + Width - 1)
1944 return false;
1945
1946 return true;
1947}
1948
1949bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1950 return isXXBRShuffleMaskHelper(N, 2);
1951}
1952
1953bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1954 return isXXBRShuffleMaskHelper(N, 4);
1955}
1956
1957bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1958 return isXXBRShuffleMaskHelper(N, 8);
1959}
1960
1961bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
1962 return isXXBRShuffleMaskHelper(N, 16);
1963}
1964
1965/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1966/// if the inputs to the instruction should be swapped and set \p DM to the
1967/// value for the immediate.
1968/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1969/// AND element 0 of the result comes from the first input (LE) or second input
1970/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1971/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1972/// mask.
1973bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
1974 bool &Swap, bool IsLE) {
1975 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1975, __extension__ __PRETTY_FUNCTION__))
;
1976
1977 // Ensure each byte index of the double word is consecutive.
1978 if (!isNByteElemShuffleMask(N, 8, 1))
1979 return false;
1980
1981 unsigned M0 = N->getMaskElt(0) / 8;
1982 unsigned M1 = N->getMaskElt(8) / 8;
1983 assert(((M0 | M1) < 4) && "A mask element out of bounds?")(static_cast <bool> (((M0 | M1) < 4) && "A mask element out of bounds?"
) ? void (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1983, __extension__ __PRETTY_FUNCTION__))
;
1984
1985 // If both vector operands for the shuffle are the same vector, the mask will
1986 // contain only elements from the first one and the second one will be undef.
1987 if (N->getOperand(1).isUndef()) {
1988 if ((M0 | M1) < 2) {
1989 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
1990 Swap = false;
1991 return true;
1992 } else
1993 return false;
1994 }
1995
1996 if (IsLE) {
1997 if (M0 > 1 && M1 < 2) {
1998 Swap = false;
1999 } else if (M0 < 2 && M1 > 1) {
2000 M0 = (M0 + 2) % 4;
2001 M1 = (M1 + 2) % 4;
2002 Swap = true;
2003 } else
2004 return false;
2005
2006 // Note: if control flow comes here that means Swap is already set above
2007 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2008 return true;
2009 } else { // BE
2010 if (M0 < 2 && M1 > 1) {
2011 Swap = false;
2012 } else if (M0 > 1 && M1 < 2) {
2013 M0 = (M0 + 2) % 4;
2014 M1 = (M1 + 2) % 4;
2015 Swap = true;
2016 } else
2017 return false;
2018
2019 // Note: if control flow comes here that means Swap is already set above
2020 DM = (M0 << 1) + (M1 & 1);
2021 return true;
2022 }
2023}
2024
2025
2026/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2027/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2028unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2029 SelectionDAG &DAG) {
2030 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2031 assert(isSplatShuffleMask(SVOp, EltSize))(static_cast <bool> (isSplatShuffleMask(SVOp, EltSize))
? void (0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2031, __extension__ __PRETTY_FUNCTION__))
;
2032 if (DAG.getDataLayout().isLittleEndian())
2033 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2034 else
2035 return SVOp->getMaskElt(0) / EltSize;
2036}
2037
2038/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2039/// by using a vspltis[bhw] instruction of the specified element size, return
2040/// the constant being splatted. The ByteSize field indicates the number of
2041/// bytes of each element [124] -> [bhw].
2042SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2043 SDValue OpVal(nullptr, 0);
2044
2045 // If ByteSize of the splat is bigger than the element size of the
2046 // build_vector, then we have a case where we are checking for a splat where
2047 // multiple elements of the buildvector are folded together into a single
2048 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2049 unsigned EltSize = 16/N->getNumOperands();
2050 if (EltSize < ByteSize) {
2051 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2052 SDValue UniquedVals[4];
2053 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")(static_cast <bool> (Multiple > 1 && Multiple
<= 4 && "How can this happen?") ? void (0) : __assert_fail
("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2053, __extension__ __PRETTY_FUNCTION__))
;
2054
2055 // See if all of the elements in the buildvector agree across.
2056 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2057 if (N->getOperand(i).isUndef()) continue;
2058 // If the element isn't a constant, bail fully out.
2059 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2060
2061 if (!UniquedVals[i&(Multiple-1)].getNode())
2062 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2063 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2064 return SDValue(); // no match.
2065 }
2066
2067 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2068 // either constant or undef values that are identical for each chunk. See
2069 // if these chunks can form into a larger vspltis*.
2070
2071 // Check to see if all of the leading entries are either 0 or -1. If
2072 // neither, then this won't fit into the immediate field.
2073 bool LeadingZero = true;
2074 bool LeadingOnes = true;
2075 for (unsigned i = 0; i != Multiple-1; ++i) {
2076 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2077
2078 LeadingZero &= isNullConstant(UniquedVals[i]);
2079 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2080 }
2081 // Finally, check the least significant entry.
2082 if (LeadingZero) {
2083 if (!UniquedVals[Multiple-1].getNode())
2084 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2085 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2086 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2087 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2088 }
2089 if (LeadingOnes) {
2090 if (!UniquedVals[Multiple-1].getNode())
2091 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2092 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2093 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2094 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2095 }
2096
2097 return SDValue();
2098 }
2099
2100 // Check to see if this buildvec has a single non-undef value in its elements.
2101 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2102 if (N->getOperand(i).isUndef()) continue;
2103 if (!OpVal.getNode())
2104 OpVal = N->getOperand(i);
2105 else if (OpVal != N->getOperand(i))
2106 return SDValue();
2107 }
2108
2109 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2110
2111 unsigned ValSizeInBytes = EltSize;
2112 uint64_t Value = 0;
2113 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2114 Value = CN->getZExtValue();
2115 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2116 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")(static_cast <bool> (CN->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!") ? void (0) : __assert_fail
("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2116, __extension__ __PRETTY_FUNCTION__))
;
2117 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2118 }
2119
2120 // If the splat value is larger than the element value, then we can never do
2121 // this splat. The only case that we could fit the replicated bits into our
2122 // immediate field for would be zero, and we prefer to use vxor for it.
2123 if (ValSizeInBytes < ByteSize) return SDValue();
2124
2125 // If the element value is larger than the splat value, check if it consists
2126 // of a repeated bit pattern of size ByteSize.
2127 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2128 return SDValue();
2129
2130 // Properly sign extend the value.
2131 int MaskVal = SignExtend32(Value, ByteSize * 8);
2132
2133 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2134 if (MaskVal == 0) return SDValue();
2135
2136 // Finally, if this value fits in a 5 bit sext field, return it
2137 if (SignExtend32<5>(MaskVal) == MaskVal)
2138 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2139 return SDValue();
2140}
2141
2142/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2143/// amount, otherwise return -1.
2144int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2145 EVT VT = N->getValueType(0);
2146 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2147 return -1;
2148
2149 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2150
2151 // Find the first non-undef value in the shuffle mask.
2152 unsigned i;
2153 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2154 /*search*/;
2155
2156 if (i == 4) return -1; // all undef.
2157
2158 // Otherwise, check to see if the rest of the elements are consecutively
2159 // numbered from this value.
2160 unsigned ShiftAmt = SVOp->getMaskElt(i);
2161 if (ShiftAmt < i) return -1;
2162 ShiftAmt -= i;
2163
2164 // Check the rest of the elements to see if they are consecutive.
2165 for (++i; i != 4; ++i)
2166 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2167 return -1;
2168
2169 return ShiftAmt;
2170}
2171
2172//===----------------------------------------------------------------------===//
2173// Addressing Mode Selection
2174//===----------------------------------------------------------------------===//
2175
2176/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2177/// or 64-bit immediate, and if the value can be accurately represented as a
2178/// sign extension from a 16-bit value. If so, this returns true and the
2179/// immediate.
2180bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2181 if (!isa<ConstantSDNode>(N))
2182 return false;
2183
2184 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2185 if (N->getValueType(0) == MVT::i32)
2186 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2187 else
2188 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2189}
2190bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2191 return isIntS16Immediate(Op.getNode(), Imm);
2192}
2193
2194/// SelectAddressRegReg - Given the specified addressed, check to see if it
2195/// can be represented as an indexed [r+r] operation. Returns false if it
2196/// can be more efficiently represented with [r+imm].
2197bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2198 SDValue &Index,
2199 SelectionDAG &DAG) const {
2200 int16_t imm = 0;
2201 if (N.getOpcode() == ISD::ADD) {
2202 if (isIntS16Immediate(N.getOperand(1), imm))
2203 return false; // r+i
2204 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2205 return false; // r+i
2206
2207 Base = N.getOperand(0);
2208 Index = N.getOperand(1);
2209 return true;
2210 } else if (N.getOpcode() == ISD::OR) {
2211 if (isIntS16Immediate(N.getOperand(1), imm))
2212 return false; // r+i can fold it if we can.
2213
2214 // If this is an or of disjoint bitfields, we can codegen this as an add
2215 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2216 // disjoint.
2217 KnownBits LHSKnown, RHSKnown;
2218 DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2219
2220 if (LHSKnown.Zero.getBoolValue()) {
2221 DAG.computeKnownBits(N.getOperand(1), RHSKnown);
2222 // If all of the bits are known zero on the LHS or RHS, the add won't
2223 // carry.
2224 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2225 Base = N.getOperand(0);
2226 Index = N.getOperand(1);
2227 return true;
2228 }
2229 }
2230 }
2231
2232 return false;
2233}
2234
2235// If we happen to be doing an i64 load or store into a stack slot that has
2236// less than a 4-byte alignment, then the frame-index elimination may need to
2237// use an indexed load or store instruction (because the offset may not be a
2238// multiple of 4). The extra register needed to hold the offset comes from the
2239// register scavenger, and it is possible that the scavenger will need to use
2240// an emergency spill slot. As a result, we need to make sure that a spill slot
2241// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2242// stack slot.
2243static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2244 // FIXME: This does not handle the LWA case.
2245 if (VT != MVT::i64)
2246 return;
2247
2248 // NOTE: We'll exclude negative FIs here, which come from argument
2249 // lowering, because there are no known test cases triggering this problem
2250 // using packed structures (or similar). We can remove this exclusion if
2251 // we find such a test case. The reason why this is so test-case driven is
2252 // because this entire 'fixup' is only to prevent crashes (from the
2253 // register scavenger) on not-really-valid inputs. For example, if we have:
2254 // %a = alloca i1
2255 // %b = bitcast i1* %a to i64*
2256 // store i64* a, i64 b
2257 // then the store should really be marked as 'align 1', but is not. If it
2258 // were marked as 'align 1' then the indexed form would have been
2259 // instruction-selected initially, and the problem this 'fixup' is preventing
2260 // won't happen regardless.
2261 if (FrameIdx < 0)
2262 return;
2263
2264 MachineFunction &MF = DAG.getMachineFunction();
2265 MachineFrameInfo &MFI = MF.getFrameInfo();
2266
2267 unsigned Align = MFI.getObjectAlignment(FrameIdx);
2268 if (Align >= 4)
2269 return;
2270
2271 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2272 FuncInfo->setHasNonRISpills();
2273}
2274
2275/// Returns true if the address N can be represented by a base register plus
2276/// a signed 16-bit displacement [r+imm], and if it is not better
2277/// represented as reg+reg. If \p Alignment is non-zero, only accept
2278/// displacements that are multiples of that value.
2279bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2280 SDValue &Base,
2281 SelectionDAG &DAG,
2282 unsigned Alignment) const {
2283 // FIXME dl should come from parent load or store, not from address
2284 SDLoc dl(N);
2285 // If this can be more profitably realized as r+r, fail.
2286 if (SelectAddressRegReg(N, Disp, Base, DAG))
2287 return false;
2288
2289 if (N.getOpcode() == ISD::ADD) {
2290 int16_t imm = 0;
2291 if (isIntS16Immediate(N.getOperand(1), imm) &&
2292 (!Alignment || (imm % Alignment) == 0)) {
2293 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2294 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2295 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2296 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2297 } else {
2298 Base = N.getOperand(0);
2299 }
2300 return true; // [r+i]
2301 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2302 // Match LOAD (ADD (X, Lo(G))).
2303 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()(static_cast <bool> (!cast<ConstantSDNode>(N.getOperand
(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"
) ? void (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2304, __extension__ __PRETTY_FUNCTION__))
2304 && "Cannot handle constant offsets yet!")(static_cast <bool> (!cast<ConstantSDNode>(N.getOperand
(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"
) ? void (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2304, __extension__ __PRETTY_FUNCTION__))
;
2305 Disp = N.getOperand(1).getOperand(0); // The global address.
2306 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2309, __extension__ __PRETTY_FUNCTION__))
2307 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2309, __extension__ __PRETTY_FUNCTION__))
2308 Disp.getOpcode() == ISD::TargetConstantPool ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2309, __extension__ __PRETTY_FUNCTION__))
2309 Disp.getOpcode() == ISD::TargetJumpTable)(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2309, __extension__ __PRETTY_FUNCTION__))
;
2310 Base = N.getOperand(0);
2311 return true; // [&g+r]
2312 }
2313 } else if (N.getOpcode() == ISD::OR) {
2314 int16_t imm = 0;
2315 if (isIntS16Immediate(N.getOperand(1), imm) &&
2316 (!Alignment || (imm % Alignment) == 0)) {
2317 // If this is an or of disjoint bitfields, we can codegen this as an add
2318 // (for better address arithmetic) if the LHS and RHS of the OR are
2319 // provably disjoint.
2320 KnownBits LHSKnown;
2321 DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2322
2323 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2324 // If all of the bits are known zero on the LHS or RHS, the add won't
2325 // carry.
2326 if (FrameIndexSDNode *FI =
2327 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2328 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2329 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2330 } else {
2331 Base = N.getOperand(0);
2332 }
2333 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2334 return true;
2335 }
2336 }
2337 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2338 // Loading from a constant address.
2339
2340 // If this address fits entirely in a 16-bit sext immediate field, codegen
2341 // this as "d, 0"
2342 int16_t Imm;
2343 if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
2344 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2345 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2346 CN->getValueType(0));
2347 return true;
2348 }
2349
2350 // Handle 32-bit sext immediates with LIS + addr mode.
2351 if ((CN->getValueType(0) == MVT::i32 ||
2352 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2353 (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
2354 int Addr = (int)CN->getZExtValue();
2355
2356 // Otherwise, break this down into an LIS + disp.
2357 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2358
2359 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2360 MVT::i32);
2361 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2362 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2363 return true;
2364 }
2365 }
2366
2367 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2368 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2369 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2370 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2371 } else
2372 Base = N;
2373 return true; // [r+0]
2374}
2375
2376/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2377/// represented as an indexed [r+r] operation.
2378bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2379 SDValue &Index,
2380 SelectionDAG &DAG) const {
2381 // Check to see if we can easily represent this as an [r+r] address. This
2382 // will fail if it thinks that the address is more profitably represented as
2383 // reg+imm, e.g. where imm = 0.
2384 if (SelectAddressRegReg(N, Base, Index, DAG))
2385 return true;
2386
2387 // If the address is the result of an add, we will utilize the fact that the
2388 // address calculation includes an implicit add. However, we can reduce
2389 // register pressure if we do not materialize a constant just for use as the
2390 // index register. We only get rid of the add if it is not an add of a
2391 // value and a 16-bit signed constant and both have a single use.
2392 int16_t imm = 0;
2393 if (N.getOpcode() == ISD::ADD &&
2394 (!isIntS16Immediate(N.getOperand(1), imm) ||
2395 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2396 Base = N.getOperand(0);
2397 Index = N.getOperand(1);
2398 return true;
2399 }
2400
2401 // Otherwise, do it the hard way, using R0 as the base register.
2402 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2403 N.getValueType());
2404 Index = N;
2405 return true;
2406}
2407
2408/// getPreIndexedAddressParts - returns true by value, base pointer and
2409/// offset pointer and addressing mode by reference if the node's address
2410/// can be legally represented as pre-indexed load / store address.
2411bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2412 SDValue &Offset,
2413 ISD::MemIndexedMode &AM,
2414 SelectionDAG &DAG) const {
2415 if (DisablePPCPreinc) return false;
2416
2417 bool isLoad = true;
2418 SDValue Ptr;
2419 EVT VT;
2420 unsigned Alignment;
2421 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2422 Ptr = LD->getBasePtr();
2423 VT = LD->getMemoryVT();
2424 Alignment = LD->getAlignment();
2425 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2426 Ptr = ST->getBasePtr();
2427 VT = ST->getMemoryVT();
2428 Alignment = ST->getAlignment();
2429 isLoad = false;
2430 } else
2431 return false;
2432
2433 // PowerPC doesn't have preinc load/store instructions for vectors (except
2434 // for QPX, which does have preinc r+r forms).
2435 if (VT.isVector()) {
2436 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2437 return false;
2438 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2439 AM = ISD::PRE_INC;
2440 return true;
2441 }
2442 }
2443
2444 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2445 // Common code will reject creating a pre-inc form if the base pointer
2446 // is a frame index, or if N is a store and the base pointer is either
2447 // the same as or a predecessor of the value being stored. Check for
2448 // those situations here, and try with swapped Base/Offset instead.
2449 bool Swap = false;
2450
2451 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2452 Swap = true;
2453 else if (!isLoad) {
2454 SDValue Val = cast<StoreSDNode>(N)->getValue();
2455 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2456 Swap = true;
2457 }
2458
2459 if (Swap)
2460 std::swap(Base, Offset);
2461
2462 AM = ISD::PRE_INC;
2463 return true;
2464 }
2465
2466 // LDU/STU can only handle immediates that are a multiple of 4.
2467 if (VT != MVT::i64) {
2468 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2469 return false;
2470 } else {
2471 // LDU/STU need an address with at least 4-byte alignment.
2472 if (Alignment < 4)
2473 return false;
2474
2475 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2476 return false;
2477 }
2478
2479 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2480 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2481 // sext i32 to i64 when addr mode is r+i.
2482 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2483 LD->getExtensionType() == ISD::SEXTLOAD &&
2484 isa<ConstantSDNode>(Offset))
2485 return false;
2486 }
2487
2488 AM = ISD::PRE_INC;
2489 return true;
2490}
2491
2492//===----------------------------------------------------------------------===//
2493// LowerOperation implementation
2494//===----------------------------------------------------------------------===//
2495
2496/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2497/// and LoOpFlags to the target MO flags.
2498static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2499 unsigned &HiOpFlags, unsigned &LoOpFlags,
2500 const GlobalValue *GV = nullptr) {
2501 HiOpFlags = PPCII::MO_HA;
2502 LoOpFlags = PPCII::MO_LO;
2503
2504 // Don't use the pic base if not in PIC relocation model.
2505 if (IsPIC) {
2506 HiOpFlags |= PPCII::MO_PIC_FLAG;
2507 LoOpFlags |= PPCII::MO_PIC_FLAG;
2508 }
2509
2510 // If this is a reference to a global value that requires a non-lazy-ptr, make
2511 // sure that instruction lowering adds it.
2512 if (GV && Subtarget.hasLazyResolverStub(GV)) {
2513 HiOpFlags |= PPCII::MO_NLP_FLAG;
2514 LoOpFlags |= PPCII::MO_NLP_FLAG;
2515
2516 if (GV->hasHiddenVisibility()) {
2517 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2518 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2519 }
2520 }
2521}
2522
2523static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2524 SelectionDAG &DAG) {
2525 SDLoc DL(HiPart);
2526 EVT PtrVT = HiPart.getValueType();
2527 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2528
2529 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2530 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2531
2532 // With PIC, the first instruction is actually "GR+hi(&G)".
2533 if (isPIC)
2534 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2535 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2536
2537 // Generate non-pic code that has direct accesses to the constant pool.
2538 // The address of the global is just (hi(&g)+lo(&g)).
2539 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2540}
2541
2542static void setUsesTOCBasePtr(MachineFunction &MF) {
2543 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2544 FuncInfo->setUsesTOCBasePtr();
2545}
2546
2547static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2548 setUsesTOCBasePtr(DAG.getMachineFunction());
2549}
2550
2551static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2552 SDValue GA) {
2553 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2554 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2555 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2556
2557 SDValue Ops[] = { GA, Reg };
2558 return DAG.getMemIntrinsicNode(
2559 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2560 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2561 MachineMemOperand::MOLoad);
2562}
2563
2564SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2565 SelectionDAG &DAG) const {
2566 EVT PtrVT = Op.getValueType();
2567 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2568 const Constant *C = CP->getConstVal();
2569
2570 // 64-bit SVR4 ABI code is always position-independent.
2571 // The actual address of the GlobalValue is stored in the TOC.
2572 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2573 setUsesTOCBasePtr(DAG);
2574 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2575 return getTOCEntry(DAG, SDLoc(CP), true, GA);
2576 }
2577
2578 unsigned MOHiFlag, MOLoFlag;
2579 bool IsPIC = isPositionIndependent();
2580 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2581
2582 if (IsPIC && Subtarget.isSVR4ABI()) {
2583 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2584 PPCII::MO_PIC_FLAG);
2585 return getTOCEntry(DAG, SDLoc(CP), false, GA);
2586 }
2587
2588 SDValue CPIHi =
2589 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2590 SDValue CPILo =
2591 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2592 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2593}
2594
2595// For 64-bit PowerPC, prefer the more compact relative encodings.
2596// This trades 32 bits per jump table entry for one or two instructions
2597// on the jump site.
2598unsigned PPCTargetLowering::getJumpTableEncoding() const {
2599 if (isJumpTableRelative())
2600 return MachineJumpTableInfo::EK_LabelDifference32;
2601
2602 return TargetLowering::getJumpTableEncoding();
2603}
2604
2605bool PPCTargetLowering::isJumpTableRelative() const {
2606 if (Subtarget.isPPC64())
2607 return true;
2608 return TargetLowering::isJumpTableRelative();
2609}
2610
2611SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2612 SelectionDAG &DAG) const {
2613 if (!Subtarget.isPPC64())
2614 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2615
2616 switch (getTargetMachine().getCodeModel()) {
2617 case CodeModel::Small:
2618 case CodeModel::Medium:
2619 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2620 default:
2621 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2622 getPointerTy(DAG.getDataLayout()));
2623 }
2624}
2625
2626const MCExpr *
2627PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2628 unsigned JTI,
2629 MCContext &Ctx) const {
2630 if (!Subtarget.isPPC64())
2631 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2632
2633 switch (getTargetMachine().getCodeModel()) {
2634 case CodeModel::Small:
2635 case CodeModel::Medium:
2636 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2637 default:
2638 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2639 }
2640}
2641
2642SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2643 EVT PtrVT = Op.getValueType();
2644 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2645
2646 // 64-bit SVR4 ABI code is always position-independent.
2647 // The actual address of the GlobalValue is stored in the TOC.
2648 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2649 setUsesTOCBasePtr(DAG);
2650 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2651 return getTOCEntry(DAG, SDLoc(JT), true, GA);
2652 }
2653
2654 unsigned MOHiFlag, MOLoFlag;
2655 bool IsPIC = isPositionIndependent();
2656 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2657
2658 if (IsPIC && Subtarget.isSVR4ABI()) {
2659 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2660 PPCII::MO_PIC_FLAG);
2661 return getTOCEntry(DAG, SDLoc(GA), false, GA);
2662 }
2663
2664 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2665 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2666 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2667}
2668
2669SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2670 SelectionDAG &DAG) const {
2671 EVT PtrVT = Op.getValueType();
2672 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2673 const BlockAddress *BA = BASDN->getBlockAddress();
2674
2675 // 64-bit SVR4 ABI code is always position-independent.
2676 // The actual BlockAddress is stored in the TOC.
2677 if (Subtarget.isSVR4ABI() && isPositionIndependent()) {
2678 if (Subtarget.isPPC64())
2679 setUsesTOCBasePtr(DAG);
2680 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2681 return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
2682 }
2683
2684 unsigned MOHiFlag, MOLoFlag;
2685 bool IsPIC = isPositionIndependent();
2686 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2687 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2688 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2689 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2690}
2691
2692SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2693 SelectionDAG &DAG) const {
2694 // FIXME: TLS addresses currently use medium model code sequences,
2695 // which is the most useful form. Eventually support for small and
2696 // large models could be added if users need it, at the cost of
2697 // additional complexity.
2698 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2699 if (DAG.getTarget().useEmulatedTLS())
2700 return LowerToTLSEmulatedModel(GA, DAG);
2701
2702 SDLoc dl(GA);
2703 const GlobalValue *GV = GA->getGlobal();
2704 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2705 bool is64bit = Subtarget.isPPC64();
2706 const Module *M = DAG.getMachineFunction().getFunction().getParent();
2707 PICLevel::Level picLevel = M->getPICLevel();
2708
2709 TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2710
2711 if (Model == TLSModel::LocalExec) {
2712 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2713 PPCII::MO_TPREL_HA);
2714 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2715 PPCII::MO_TPREL_LO);
2716 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2717 : DAG.getRegister(PPC::R2, MVT::i32);
2718
2719 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2720 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2721 }
2722
2723 if (Model == TLSModel::InitialExec) {
2724 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2725 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2726 PPCII::MO_TLS);
2727 SDValue GOTPtr;
2728 if (is64bit) {
2729 setUsesTOCBasePtr(DAG);
2730 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2731 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2732 PtrVT, GOTReg, TGA);
2733 } else
2734 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2735 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2736 PtrVT, TGA, GOTPtr);
2737 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2738 }
2739
2740 if (Model == TLSModel::GeneralDynamic) {
2741 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2742 SDValue GOTPtr;
2743 if (is64bit) {
2744 setUsesTOCBasePtr(DAG);
2745 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2746 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2747 GOTReg, TGA);
2748 } else {
2749 if (picLevel == PICLevel::SmallPIC)
2750 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2751 else
2752 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2753 }
2754 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2755 GOTPtr, TGA, TGA);
2756 }
2757
2758 if (Model == TLSModel::LocalDynamic) {
2759 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2760 SDValue GOTPtr;
2761 if (is64bit) {
2762 setUsesTOCBasePtr(DAG);
2763 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2764 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2765 GOTReg, TGA);
2766 } else {
2767 if (picLevel == PICLevel::SmallPIC)
2768 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2769 else
2770 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2771 }
2772 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2773 PtrVT, GOTPtr, TGA, TGA);
2774 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2775 PtrVT, TLSAddr, TGA);
2776 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2777 }
2778
2779 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2779)
;
2780}
2781
2782SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2783 SelectionDAG &DAG) const {
2784 EVT PtrVT = Op.getValueType();
2785 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2786 SDLoc DL(GSDN);
2787 const GlobalValue *GV = GSDN->getGlobal();
2788
2789 // 64-bit SVR4 ABI code is always position-independent.
2790 // The actual address of the GlobalValue is stored in the TOC.
2791 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2792 setUsesTOCBasePtr(DAG);
2793 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2794 return getTOCEntry(DAG, DL, true, GA);
2795 }
2796
2797 unsigned MOHiFlag, MOLoFlag;
2798 bool IsPIC = isPositionIndependent();
2799 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2800
2801 if (IsPIC && Subtarget.isSVR4ABI()) {
2802 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2803 GSDN->getOffset(),
2804 PPCII::MO_PIC_FLAG);
2805 return getTOCEntry(DAG, DL, false, GA);
2806 }
2807
2808 SDValue GAHi =
2809 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2810 SDValue GALo =
2811 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2812
2813 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2814
2815 // If the global reference is actually to a non-lazy-pointer, we have to do an
2816 // extra load to get the address of the global.
2817 if (MOHiFlag & PPCII::MO_NLP_FLAG)
2818 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2819 return Ptr;
2820}
2821
2822SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2823 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2824 SDLoc dl(Op);
2825
2826 if (Op.getValueType() == MVT::v2i64) {
2827 // When the operands themselves are v2i64 values, we need to do something
2828 // special because VSX has no underlying comparison operations for these.
2829 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2830 // Equality can be handled by casting to the legal type for Altivec
2831 // comparisons, everything else needs to be expanded.
2832 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2833 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2834 DAG.getSetCC(dl, MVT::v4i32,
2835 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2836 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2837 CC));
2838 }
2839
2840 return SDValue();
2841 }
2842
2843 // We handle most of these in the usual way.
2844 return Op;
2845 }
2846
2847 // If we're comparing for equality to zero, expose the fact that this is
2848 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2849 // fold the new nodes.
2850 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2851 return V;
2852
2853 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2854 // Leave comparisons against 0 and -1 alone for now, since they're usually
2855 // optimized. FIXME: revisit this when we can custom lower all setcc
2856 // optimizations.
2857 if (C->isAllOnesValue() || C->isNullValue())
2858 return SDValue();
2859 }
2860
2861 // If we have an integer seteq/setne, turn it into a compare against zero
2862 // by xor'ing the rhs with the lhs, which is faster than setting a
2863 // condition register, reading it back out, and masking the correct bit. The
2864 // normal approach here uses sub to do this instead of xor. Using xor exposes
2865 // the result to other bit-twiddling opportunities.
2866 EVT LHSVT = Op.getOperand(0).getValueType();
2867 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2868 EVT VT = Op.getValueType();
2869 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2870 Op.getOperand(1));
2871 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2872 }
2873 return SDValue();
2874}
2875
2876SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2877 SDNode *Node = Op.getNode();
2878 EVT VT = Node->getValueType(0);
2879 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2880 SDValue InChain = Node->getOperand(0);
2881 SDValue VAListPtr = Node->getOperand(1);
2882 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2883 SDLoc dl(Node);
2884
2885 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")(static_cast <bool> (!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"
) ? void (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2885, __extension__ __PRETTY_FUNCTION__))
;
2886
2887 // gpr_index
2888 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2889 VAListPtr, MachinePointerInfo(SV), MVT::i8);
2890 InChain = GprIndex.getValue(1);
2891
2892 if (VT == MVT::i64) {
2893 // Check if GprIndex is even
2894 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2895 DAG.getConstant(1, dl, MVT::i32));
2896 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2897 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2898 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2899 DAG.getConstant(1, dl, MVT::i32));
2900 // Align GprIndex to be even if it isn't
2901 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2902 GprIndex);
2903 }
2904
2905 // fpr index is 1 byte after gpr
2906 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2907 DAG.getConstant(1, dl, MVT::i32));
2908
2909 // fpr
2910 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2911 FprPtr, MachinePointerInfo(SV), MVT::i8);
2912 InChain = FprIndex.getValue(1);
2913
2914 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2915 DAG.getConstant(8, dl, MVT::i32));
2916
2917 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2918 DAG.getConstant(4, dl, MVT::i32));
2919
2920 // areas
2921 SDValue OverflowArea =
2922 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2923 InChain = OverflowArea.getValue(1);
2924
2925 SDValue RegSaveArea =
2926 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2927 InChain = RegSaveArea.getValue(1);
2928
2929 // select overflow_area if index > 8
2930 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2931 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2932
2933 // adjustment constant gpr_index * 4/8
2934 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2935 VT.isInteger() ? GprIndex : FprIndex,
2936 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2937 MVT::i32));
2938
2939 // OurReg = RegSaveArea + RegConstant
2940 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2941 RegConstant);
2942
2943 // Floating types are 32 bytes into RegSaveArea
2944 if (VT.isFloatingPoint())
2945 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2946 DAG.getConstant(32, dl, MVT::i32));
2947
2948 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2949 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2950 VT.isInteger() ? GprIndex : FprIndex,
2951 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2952 MVT::i32));
2953
2954 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2955 VT.isInteger() ? VAListPtr : FprPtr,
2956 MachinePointerInfo(SV), MVT::i8);
2957
2958 // determine if we should load from reg_save_area or overflow_area
2959 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2960
2961 // increase overflow_area by 4/8 if gpr/fpr > 8
2962 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2963 DAG.getConstant(VT.isInteger() ? 4 : 8,
2964 dl, MVT::i32));
2965
2966 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2967 OverflowAreaPlusN);
2968
2969 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2970 MachinePointerInfo(), MVT::i32);
2971
2972 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2973}
2974
2975SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2976 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")(static_cast <bool> (!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? void (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2976, __extension__ __PRETTY_FUNCTION__))
;
2977
2978 // We have to copy the entire va_list struct:
2979 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2980 return DAG.getMemcpy(Op.getOperand(0), Op,
2981 Op.getOperand(1), Op.getOperand(2),
2982 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2983 false, MachinePointerInfo(), MachinePointerInfo());
2984}
2985
2986SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2987 SelectionDAG &DAG) const {
2988 return Op.getOperand(0);
2989}
2990
2991SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2992 SelectionDAG &DAG) const {
2993 SDValue Chain = Op.getOperand(0);
2994 SDValue Trmp = Op.getOperand(1); // trampoline
2995 SDValue FPtr = Op.getOperand(2); // nested function
2996 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2997 SDLoc dl(Op);
2998
2999 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3000 bool isPPC64 = (PtrVT == MVT::i64);
3001 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3002
3003 TargetLowering::ArgListTy Args;
3004 TargetLowering::ArgListEntry Entry;
3005
3006 Entry.Ty = IntPtrTy;
3007 Entry.Node = Trmp; Args.push_back(Entry);
3008
3009 // TrampSize == (isPPC64 ? 48 : 40);
3010 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3011 isPPC64 ? MVT::i64 : MVT::i32);
3012 Args.push_back(Entry);
3013
3014 Entry.Node = FPtr; Args.push_back(Entry);
3015 Entry.Node = Nest; Args.push_back(Entry);
3016
3017 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3018 TargetLowering::CallLoweringInfo CLI(DAG);
3019 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3020 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3021 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3022
3023 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3024 return CallResult.second;
3025}
3026
3027SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3028 MachineFunction &MF = DAG.getMachineFunction();
3029 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3030 EVT PtrVT = getPointerTy(MF.getDataLayout());
3031
3032 SDLoc dl(Op);
3033
3034 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3035 // vastart just stores the address of the VarArgsFrameIndex slot into the
3036 // memory location argument.
3037 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3038 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3039 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3040 MachinePointerInfo(SV));
3041 }
3042
3043 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3044 // We suppose the given va_list is already allocated.
3045 //
3046 // typedef struct {
3047 // char gpr; /* index into the array of 8 GPRs
3048 // * stored in the register save area
3049 // * gpr=0 corresponds to r3,
3050 // * gpr=1 to r4, etc.
3051 // */
3052 // char fpr; /* index into the array of 8 FPRs
3053 // * stored in the register save area
3054 // * fpr=0 corresponds to f1,
3055 // * fpr=1 to f2, etc.
3056 // */
3057 // char *overflow_arg_area;
3058 // /* location on stack that holds
3059 // * the next overflow argument
3060 // */
3061 // char *reg_save_area;
3062 // /* where r3:r10 and f1:f8 (if saved)
3063 // * are stored
3064 // */
3065 // } va_list[1];
3066
3067 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3068 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3069 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3070 PtrVT);
3071 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3072 PtrVT);
3073
3074 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3075 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3076
3077 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3078 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3079
3080 uint64_t FPROffset = 1;
3081 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3082
3083 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3084
3085 // Store first byte : number of int regs
3086 SDValue firstStore =
3087 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3088 MachinePointerInfo(SV), MVT::i8);
3089 uint64_t nextOffset = FPROffset;
3090 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3091 ConstFPROffset);
3092
3093 // Store second byte : number of float regs
3094 SDValue secondStore =
3095 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3096 MachinePointerInfo(SV, nextOffset), MVT::i8);
3097 nextOffset += StackOffset;
3098 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3099
3100 // Store second word : arguments given on stack
3101 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3102 MachinePointerInfo(SV, nextOffset));
3103 nextOffset += FrameOffset;
3104 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3105
3106 // Store third word : arguments given in registers
3107 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3108 MachinePointerInfo(SV, nextOffset));
3109}
3110
3111#include "PPCGenCallingConv.inc"
3112
3113// Function whose sole purpose is to kill compiler warnings
3114// stemming from unused functions included from PPCGenCallingConv.inc.
3115CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
3116 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
3117}
3118
3119bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
3120 CCValAssign::LocInfo &LocInfo,
3121 ISD::ArgFlagsTy &ArgFlags,
3122 CCState &State) {
3123 return true;
3124}
3125
3126bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
3127 MVT &LocVT,
3128 CCValAssign::LocInfo &LocInfo,
3129 ISD::ArgFlagsTy &ArgFlags,
3130 CCState &State) {
3131 static const MCPhysReg ArgRegs[] = {
3132 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3133 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3134 };
3135 const unsigned NumArgRegs = array_lengthof(ArgRegs);
3136
3137 unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3138
3139 // Skip one register if the first unallocated register has an even register
3140 // number and there are still argument registers available which have not been
3141 // allocated yet. RegNum is actually an index into ArgRegs, which means we
3142 // need to skip a register if RegNum is odd.
3143 if (RegNum != NumArgRegs && RegNum % 2 == 1) {
3144 State.AllocateReg(ArgRegs[RegNum]);
3145 }
3146
3147 // Always return false here, as this function only makes sure that the first
3148 // unallocated register has an odd register number and does not actually
3149 // allocate a register for the current argument.
3150 return false;
3151}
3152
3153bool
3154llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
3155 MVT &LocVT,
3156 CCValAssign::LocInfo &LocInfo,
3157 ISD::ArgFlagsTy &ArgFlags,
3158 CCState &State) {
3159 static const MCPhysReg ArgRegs[] = {
3160 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3161 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3162 };
3163 const unsigned NumArgRegs = array_lengthof(ArgRegs);
3164
3165 unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3166 int RegsLeft = NumArgRegs - RegNum;
3167
3168 // Skip if there is not enough registers left for long double type (4 gpr regs
3169 // in soft float mode) and put long double argument on the stack.
3170 if (RegNum != NumArgRegs && RegsLeft < 4) {
3171 for (int i = 0; i < RegsLeft; i++) {
3172 State.AllocateReg(ArgRegs[RegNum + i]);
3173 }
3174 }
3175
3176 return false;
3177}
3178
3179bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3180 MVT &LocVT,
3181 CCValAssign::LocInfo &LocInfo,
3182 ISD::ArgFlagsTy &ArgFlags,
3183 CCState &State) {
3184 static const MCPhysReg ArgRegs[] = {
3185 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3186 PPC::F8
3187 };
3188
3189 const unsigned NumArgRegs = array_lengthof(ArgRegs);
3190
3191 unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3192
3193 // If there is only one Floating-point register left we need to put both f64
3194 // values of a split ppc_fp128 value on the stack.
3195 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
3196 State.AllocateReg(ArgRegs[RegNum]);
3197 }
3198
3199 // Always return false here, as this function only makes sure that the two f64
3200 // values a ppc_fp128 value is split into are both passed in registers or both
3201 // passed on the stack and does not actually allocate a register for the
3202 // current argument.
3203 return false;
3204}
3205
3206/// FPR - The set of FP registers that should be allocated for arguments,
3207/// on Darwin.
3208static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3209 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3210 PPC::F11, PPC::F12, PPC::F13};
3211
3212/// QFPR - The set of QPX registers that should be allocated for arguments.
3213static const MCPhysReg QFPR[] = {
3214 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
3215 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3216
3217/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3218/// the stack.
3219static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3220 unsigned PtrByteSize) {
3221 unsigned ArgSize = ArgVT.getStoreSize();
3222 if (Flags.isByVal())
3223 ArgSize = Flags.getByValSize();
3224
3225 // Round up to multiples of the pointer size, except for array members,
3226 // which are always packed.
3227 if (!Flags.isInConsecutiveRegs())
3228 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3229
3230 return ArgSize;
3231}
3232
3233/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3234/// on the stack.
3235static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3236 ISD::ArgFlagsTy Flags,
3237 unsigned PtrByteSize) {
3238 unsigned Align = PtrByteSize;
3239
3240 // Altivec parameters are padded to a 16 byte boundary.
3241 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3242 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3243 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3244 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3245 Align = 16;
3246 // QPX vector types stored in double-precision are padded to a 32 byte
3247 // boundary.
3248 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3249 Align = 32;
3250
3251 // ByVal parameters are aligned as requested.
3252 if (Flags.isByVal()) {
3253 unsigned BVAlign = Flags.getByValAlign();
3254 if (BVAlign > PtrByteSize) {
3255 if (BVAlign % PtrByteSize != 0)
3256 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3257)
3257 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3257)
;
3258
3259 Align = BVAlign;
3260 }
3261 }
3262
3263 // Array members are always packed to their original alignment.
3264 if (Flags.isInConsecutiveRegs()) {
3265 // If the array member was split into multiple registers, the first
3266 // needs to be aligned to the size of the full type. (Except for
3267 // ppcf128, which is only aligned as its f64 components.)
3268 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3269 Align = OrigVT.getStoreSize();
3270 else
3271 Align = ArgVT.getStoreSize();
3272 }
3273
3274 return Align;
3275}
3276
3277/// CalculateStackSlotUsed - Return whether this argument will use its
3278/// stack slot (instead of being passed in registers). ArgOffset,
3279/// AvailableFPRs, and AvailableVRs must hold the current argument
3280/// position, and will be updated to account for this argument.
3281static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3282 ISD::ArgFlagsTy Flags,
3283 unsigned PtrByteSize,
3284 unsigned LinkageSize,
3285 unsigned ParamAreaSize,
3286 unsigned &ArgOffset,
3287 unsigned &AvailableFPRs,
3288 unsigned &AvailableVRs, bool HasQPX) {
3289 bool UseMemory = false;
3290
3291 // Respect alignment of argument on the stack.
3292 unsigned Align =
3293 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3294 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3295 // If there's no space left in the argument save area, we must
3296 // use memory (this check also catches zero-sized arguments).
3297 if (ArgOffset >= LinkageSize + ParamAreaSize)
3298 UseMemory = true;
3299
3300 // Allocate argument on the stack.
3301 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3302 if (Flags.isInConsecutiveRegsLast())
3303 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3304 // If we overran the argument save area, we must use memory
3305 // (this check catches arguments passed partially in memory)
3306 if (ArgOffset > LinkageSize + ParamAreaSize)
3307 UseMemory = true;
3308
3309 // However, if the argument is actually passed in an FPR or a VR,
3310 // we don't use memory after all.
3311 if (!Flags.isByVal()) {
3312 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3313 // QPX registers overlap with the scalar FP registers.
3314 (HasQPX && (ArgVT == MVT::v4f32 ||
3315 ArgVT == MVT::v4f64 ||
3316 ArgVT == MVT::v4i1)))
3317 if (AvailableFPRs > 0) {
3318 --AvailableFPRs;
3319 return false;
3320 }
3321 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3322 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3323 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3324 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3325 if (AvailableVRs > 0) {
3326 --AvailableVRs;
3327 return false;
3328 }
3329 }
3330
3331 return UseMemory;
3332}
3333
3334/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3335/// ensure minimum alignment required for target.
3336static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3337 unsigned NumBytes) {
3338 unsigned TargetAlign = Lowering->getStackAlignment();
3339 unsigned AlignMask = TargetAlign - 1;
3340 NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3341 return NumBytes;
3342}
3343
3344SDValue PPCTargetLowering::LowerFormalArguments(
3345 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3346 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3347 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3348 if (Subtarget.isSVR4ABI()) {
3349 if (Subtarget.isPPC64())
3350 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3351 dl, DAG, InVals);
3352 else
3353 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3354 dl, DAG, InVals);
3355 } else {
3356 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3357 dl, DAG, InVals);
3358 }
3359}
3360
3361SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3362 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3363 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3364 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3365
3366 // 32-bit SVR4 ABI Stack Frame Layout:
3367 // +-----------------------------------+
3368 // +--> | Back chain |
3369 // | +-----------------------------------+
3370 // | | Floating-point register save area |
3371 // | +-----------------------------------+
3372 // | | General register save area |
3373 // | +-----------------------------------+
3374 // | | CR save word |
3375 // | +-----------------------------------+
3376 // | | VRSAVE save word |
3377 // | +-----------------------------------+
3378 // | | Alignment padding |
3379 // | +-----------------------------------+
3380 // | | Vector register save area |
3381 // | +-----------------------------------+
3382 // | | Local variable space |
3383 // | +-----------------------------------+
3384 // | | Parameter list area |
3385 // | +-----------------------------------+
3386 // | | LR save word |
3387 // | +-----------------------------------+
3388 // SP--> +--- | Back chain |
3389 // +-----------------------------------+
3390 //
3391 // Specifications:
3392 // System V Application Binary Interface PowerPC Processor Supplement
3393 // AltiVec Technology Programming Interface Manual
3394
3395 MachineFunction &MF = DAG.getMachineFunction();
3396 MachineFrameInfo &MFI = MF.getFrameInfo();
3397 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3398
3399 EVT PtrVT = getPointerTy(MF.getDataLayout());
3400 // Potential tail calls could cause overwriting of argument stack slots.
3401 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3402 (CallConv == CallingConv::Fast));
3403 unsigned PtrByteSize = 4;
3404
3405 // Assign locations to all of the incoming arguments.
3406 SmallVector<CCValAssign, 16> ArgLocs;
3407 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3408 *DAG.getContext());
3409
3410 // Reserve space for the linkage area on the stack.
3411 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3412 CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3413 if (useSoftFloat() || hasSPE())
3414 CCInfo.PreAnalyzeFormalArguments(Ins);
3415
3416 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3417 CCInfo.clearWasPPCF128();
3418
3419 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3420 CCValAssign &VA = ArgLocs[i];
3421
3422 // Arguments stored in registers.
3423 if (VA.isRegLoc()) {
3424 const TargetRegisterClass *RC;
3425 EVT ValVT = VA.getValVT();
3426
3427 switch (ValVT.getSimpleVT().SimpleTy) {
3428 default:
3429 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3429)
;
3430 case MVT::i1:
3431 case MVT::i32:
3432 RC = &PPC::GPRCRegClass;
3433 break;
3434 case MVT::f32:
3435 if (Subtarget.hasP8Vector())
3436 RC = &PPC::VSSRCRegClass;
3437 else if (Subtarget.hasSPE())
3438 RC = &PPC::SPE4RCRegClass;
3439 else
3440 RC = &PPC::F4RCRegClass;
3441 break;
3442 case MVT::f64:
3443 if (Subtarget.hasVSX())
3444 RC = &PPC::VSFRCRegClass;
3445 else if (Subtarget.hasSPE())
3446 RC = &PPC::SPERCRegClass;
3447 else
3448 RC = &PPC::F8RCRegClass;
3449 break;
3450 case MVT::v16i8:
3451 case MVT::v8i16:
3452 case MVT::v4i32:
3453 RC = &PPC::VRRCRegClass;
3454 break;
3455 case MVT::v4f32:
3456 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3457 break;
3458 case MVT::v2f64:
3459 case MVT::v2i64:
3460 RC = &PPC::VRRCRegClass;
3461 break;
3462 case MVT::v4f64:
3463 RC = &PPC::QFRCRegClass;
3464 break;
3465 case MVT::v4i1:
3466 RC = &PPC::QBRCRegClass;
3467 break;
3468 }
3469
3470 // Transform the arguments stored in physical registers into virtual ones.
3471 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3472 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3473 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3474
3475 if (ValVT == MVT::i1)
3476 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3477
3478 InVals.push_back(ArgValue);
3479 } else {
3480 // Argument stored in memory.
3481 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3481, __extension__ __PRETTY_FUNCTION__))
;
3482
3483 unsigned ArgSize = VA.getLocVT().getStoreSize();
3484 int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3485 isImmutable);
3486
3487 // Create load nodes to retrieve arguments from the stack.
3488 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3489 InVals.push_back(
3490 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3491 }
3492 }
3493
3494 // Assign locations to all of the incoming aggregate by value arguments.
3495 // Aggregates passed by value are stored in the local variable space of the
3496 // caller's stack frame, right above the parameter list area.
3497 SmallVector<CCValAssign, 16> ByValArgLocs;
3498 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3499 ByValArgLocs, *DAG.getContext());
3500
3501 // Reserve stack space for the allocations in CCInfo.
3502 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3503
3504 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3505
3506 // Area that is at least reserved in the caller of this function.
3507 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3508 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3509
3510 // Set the size that is at least reserved in caller of this function. Tail
3511 // call optimized function's reserved stack space needs to be aligned so that
3512 // taking the difference between two stack areas will result in an aligned
3513 // stack.
3514 MinReservedArea =
3515 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3516 FuncInfo->setMinReservedArea(MinReservedArea);
3517
3518 SmallVector<SDValue, 8> MemOps;
3519
3520 // If the function takes variable number of arguments, make a frame index for
3521 // the start of the first vararg value... for expansion of llvm.va_start.
3522 if (isVarArg) {
3523 static const MCPhysReg GPArgRegs[] = {
3524 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3525 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3526 };
3527 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3528
3529 static const MCPhysReg FPArgRegs[] = {
3530 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3531 PPC::F8
3532 };
3533 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3534
3535 if (useSoftFloat() || hasSPE())
3536 NumFPArgRegs = 0;
3537
3538 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3539 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3540
3541 // Make room for NumGPArgRegs and NumFPArgRegs.
3542 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3543 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3544
3545 FuncInfo->setVarArgsStackOffset(
3546 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3547 CCInfo.getNextStackOffset(), true));
3548
3549 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3550 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3551
3552 // The fixed integer arguments of a variadic function are stored to the
3553 // VarArgsFrameIndex on the stack so that they may be loaded by
3554 // dereferencing the result of va_next.
3555 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3556 // Get an existing live-in vreg, or add a new one.
3557 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3558 if (!VReg)
3559 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3560
3561 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3562 SDValue Store =
3563 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3564 MemOps.push_back(Store);
3565 // Increment the address by four for the next argument to store
3566 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3567 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3568 }
3569
3570 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3571 // is set.
3572 // The double arguments are stored to the VarArgsFrameIndex
3573 // on the stack.
3574 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3575 // Get an existing live-in vreg, or add a new one.
3576 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3577 if (!VReg)
3578 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3579
3580 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3581 SDValue Store =
3582 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3583 MemOps.push_back(Store);
3584 // Increment the address by eight for the next argument to store
3585 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3586 PtrVT);
3587 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3588 }
3589 }
3590
3591 if (!MemOps.empty())
3592 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3593
3594 return Chain;
3595}
3596
3597// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3598// value to MVT::i64 and then truncate to the correct register size.
3599SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3600 EVT ObjectVT, SelectionDAG &DAG,
3601 SDValue ArgVal,
3602 const SDLoc &dl) const {
3603 if (Flags.isSExt())
3604 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3605 DAG.getValueType(ObjectVT));
3606 else if (Flags.isZExt())
3607 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3608 DAG.getValueType(ObjectVT));
3609
3610 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3611}
3612
3613SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3614 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3615 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3616 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3617 // TODO: add description of PPC stack frame format, or at least some docs.
3618 //
3619 bool isELFv2ABI = Subtarget.isELFv2ABI();
3620 bool isLittleEndian = Subtarget.isLittleEndian();
3621 MachineFunction &MF = DAG.getMachineFunction();
3622 MachineFrameInfo &MFI = MF.getFrameInfo();
3623 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3624
3625 assert(!(CallConv == CallingConv::Fast && isVarArg) &&(static_cast <bool> (!(CallConv == CallingConv::Fast &&
isVarArg) && "fastcc not supported on varargs functions"
) ? void (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3626, __extension__ __PRETTY_FUNCTION__))
3626 "fastcc not supported on varargs functions")(static_cast <bool> (!(CallConv == CallingConv::Fast &&
isVarArg) && "fastcc not supported on varargs functions"
) ? void (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3626, __extension__ __PRETTY_FUNCTION__))
;
3627
3628 EVT PtrVT = getPointerTy(MF.getDataLayout());
3629 // Potential tail calls could cause overwriting of argument stack slots.
3630 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3631 (CallConv == CallingConv::Fast));
3632 unsigned PtrByteSize = 8;
3633 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3634
3635 static const MCPhysReg GPR[] = {
3636 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3637 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3638 };
3639 static const MCPhysReg VR[] = {
3640 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3641 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3642 };
3643
3644 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3645 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3646 const unsigned Num_VR_Regs = array_lengthof(VR);
3647 const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3648
3649 // Do a first pass over the arguments to determine whether the ABI
3650 // guarantees that our caller has allocated the parameter save area
3651 // on its stack frame. In the ELFv1 ABI, this is always the case;
3652 // in the ELFv2 ABI, it is true if this is a vararg function or if
3653 // any parameter is located in a stack slot.
3654
3655 bool HasParameterArea = !isELFv2ABI || isVarArg;
3656 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3657 unsigned NumBytes = LinkageSize;
3658 unsigned AvailableFPRs = Num_FPR_Regs;
3659 unsigned AvailableVRs = Num_VR_Regs;
3660 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3661 if (Ins[i].Flags.isNest())
3662 continue;
3663
3664 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3665 PtrByteSize, LinkageSize, ParamAreaSize,
3666 NumBytes, AvailableFPRs, AvailableVRs,
3667 Subtarget.hasQPX()))
3668 HasParameterArea = true;
3669 }
3670
3671 // Add DAG nodes to load the arguments or copy them out of registers. On
3672 // entry to a function on PPC, the arguments start after the linkage area,
3673 // although the first ones are often in registers.
3674
3675 unsigned ArgOffset = LinkageSize;
3676 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3677 unsigned &QFPR_idx = FPR_idx;
3678 SmallVector<SDValue, 8> MemOps;
3679 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3680 unsigned CurArgIdx = 0;
3681 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3682 SDValue ArgVal;
3683 bool needsLoad = false;
3684 EVT ObjectVT = Ins[ArgNo].VT;
3685 EVT OrigVT = Ins[ArgNo].ArgVT;
3686 unsigned ObjSize = ObjectVT.getStoreSize();
3687 unsigned ArgSize = ObjSize;
3688 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3689 if (Ins[ArgNo].isOrigArg()) {
3690 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3691 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3692 }
3693 // We re-align the argument offset for each argument, except when using the
3694 // fast calling convention, when we need to make sure we do that only when
3695 // we'll actually use a stack slot.
3696 unsigned CurArgOffset, Align;
3697 auto ComputeArgOffset = [&]() {
3698 /* Respect alignment of argument on the stack. */
3699 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3700 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3701 CurArgOffset = ArgOffset;
3702 };
3703
3704 if (CallConv != CallingConv::Fast) {
3705 ComputeArgOffset();
3706
3707 /* Compute GPR index associated with argument offset. */
3708 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3709 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3710 }
3711
3712 // FIXME the codegen can be much improved in some cases.
3713 // We do not have to keep everything in memory.
3714 if (Flags.isByVal()) {
3715 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")(static_cast <bool> (Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3715, __extension__ __PRETTY_FUNCTION__))
;
3716
3717 if (CallConv == CallingConv::Fast)
3718 ComputeArgOffset();
3719
3720 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3721 ObjSize = Flags.getByValSize();
3722 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3723 // Empty aggregate parameters do not take up registers. Examples:
3724 // struct { } a;
3725 // union { } b;
3726 // int c[0];
3727 // etc. However, we have to provide a place-holder in InVals, so
3728 // pretend we have an 8-byte item at the current address for that
3729 // purpose.
3730 if (!ObjSize) {
3731 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3732 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3733 InVals.push_back(FIN);
3734 continue;
3735 }
3736
3737 // Create a stack object covering all stack doublewords occupied
3738 // by the argument. If the argument is (fully or partially) on
3739 // the stack, or if the argument is fully in registers but the
3740 // caller has allocated the parameter save anyway, we can refer
3741 // directly to the caller's stack frame. Otherwise, create a
3742 // local copy in our own frame.
3743 int FI;
3744 if (HasParameterArea ||
3745 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3746 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3747 else
3748 FI = MFI.CreateStackObject(ArgSize, Align, false);
3749 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3750
3751 // Handle aggregates smaller than 8 bytes.
3752 if (ObjSize < PtrByteSize) {
3753 // The value of the object is its address, which differs from the
3754 // address of the enclosing doubleword on big-endian systems.
3755 SDValue Arg = FIN;
3756 if (!isLittleEndian) {
3757 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3758 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3759 }
3760 InVals.push_back(Arg);
3761
3762 if (GPR_idx != Num_GPR_Regs) {
3763 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3764 FuncInfo->addLiveInAttr(VReg, Flags);
3765 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3766 SDValue Store;
3767
3768 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3769 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3770 (ObjSize == 2 ? MVT::i16 : MVT::i32));
3771 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3772 MachinePointerInfo(&*FuncArg), ObjType);
3773 } else {
3774 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3775 // store the whole register as-is to the parameter save area
3776 // slot.
3777 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3778 MachinePointerInfo(&*FuncArg));
3779 }
3780
3781 MemOps.push_back(Store);
3782 }
3783 // Whether we copied from a register or not, advance the offset
3784 // into the parameter save area by a full doubleword.
3785 ArgOffset += PtrByteSize;
3786 continue;
3787 }
3788
3789 // The value of the object is its address, which is the address of
3790 // its first stack doubleword.
3791 InVals.push_back(FIN);
3792
3793 // Store whatever pieces of the object are in registers to memory.
3794 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3795 if (GPR_idx == Num_GPR_Regs)
3796 break;
3797
3798 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3799 FuncInfo->addLiveInAttr(VReg, Flags);
3800 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3801 SDValue Addr = FIN;
3802 if (j) {
3803 SDValue Off = DAG.getConstant(j, dl, PtrVT);
3804 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3805 }
3806 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3807 MachinePointerInfo(&*FuncArg, j));
3808 MemOps.push_back(Store);
3809 ++GPR_idx;
3810 }
3811 ArgOffset += ArgSize;
3812 continue;
3813 }
3814
3815 switch (ObjectVT.getSimpleVT().SimpleTy) {
3816 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3816)
;
3817 case MVT::i1:
3818 case MVT::i32:
3819 case MVT::i64:
3820 if (Flags.isNest()) {
3821 // The 'nest' parameter, if any, is passed in R11.
3822 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3823 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3824
3825 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3826 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3827
3828 break;
3829 }
3830
3831 // These can be scalar arguments or elements of an integer array type
3832 // passed directly. Clang may use those instead of "byval" aggregate
3833 // types to avoid forcing arguments to memory unnecessarily.
3834 if (GPR_idx != Num_GPR_Regs) {
3835 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3836 FuncInfo->addLiveInAttr(VReg, Flags);
3837 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3838
3839 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3840 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3841 // value to MVT::i64 and then truncate to the correct register size.
3842 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3843 } else {
3844 if (CallConv == CallingConv::Fast)
3845 ComputeArgOffset();
3846
3847 needsLoad = true;
3848 ArgSize = PtrByteSize;
3849 }
3850 if (CallConv != CallingConv::Fast || needsLoad)
3851 ArgOffset += 8;
3852 break;
3853
3854 case MVT::f32:
3855 case MVT::f64:
3856 // These can be scalar arguments or elements of a float array type
3857 // passed directly. The latter are used to implement ELFv2 homogenous
3858 // float aggregates.
3859 if (FPR_idx != Num_FPR_Regs) {
3860 unsigned VReg;
3861
3862 if (ObjectVT == MVT::f32)
3863 VReg = MF.addLiveIn(FPR[FPR_idx],
3864 Subtarget.hasP8Vector()
3865 ? &PPC::VSSRCRegClass
3866 : &PPC::F4RCRegClass);
3867 else
3868 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3869 ? &PPC::VSFRCRegClass
3870 : &PPC::F8RCRegClass);
3871
3872 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3873 ++FPR_idx;
3874 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3875 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3876 // once we support fp <-> gpr moves.
3877
3878 // This can only ever happen in the presence of f32 array types,
3879 // since otherwise we never run out of FPRs before running out
3880 // of GPRs.
3881 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3882 FuncInfo->addLiveInAttr(VReg, Flags);
3883 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3884
3885 if (ObjectVT == MVT::f32) {
3886 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3887 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3888 DAG.getConstant(32, dl, MVT::i32));
3889 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3890 }
3891
3892 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3893 } else {
3894 if (CallConv == CallingConv::Fast)
3895 ComputeArgOffset();
3896
3897 needsLoad = true;
3898 }
3899
3900 // When passing an array of floats, the array occupies consecutive
3901 // space in the argument area; only round up to the next doubleword
3902 // at the end of the array. Otherwise, each float takes 8 bytes.
3903 if (CallConv != CallingConv::Fast || needsLoad) {
3904 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3905 ArgOffset += ArgSize;
3906 if (Flags.isInConsecutiveRegsLast())
3907 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3908 }
3909 break;
3910 case MVT::v4f32:
3911 case MVT::v4i32:
3912 case MVT::v8i16:
3913 case MVT::v16i8:
3914 case MVT::v2f64:
3915 case MVT::v2i64:
3916 case MVT::v1i128:
3917 case MVT::f128:
3918 if (!Subtarget.hasQPX()) {
3919 // These can be scalar arguments or elements of a vector array type
3920 // passed directly. The latter are used to implement ELFv2 homogenous
3921 // vector aggregates.
3922 if (VR_idx != Num_VR_Regs) {
3923 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3924 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3925 ++VR_idx;
3926 } else {
3927 if (CallConv == CallingConv::Fast)
3928 ComputeArgOffset();
3929 needsLoad = true;
3930 }
3931 if (CallConv != CallingConv::Fast || needsLoad)
3932 ArgOffset += 16;
3933 break;
3934 } // not QPX
3935
3936 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&(static_cast <bool> (ObjectVT.getSimpleVT().SimpleTy ==
MVT::v4f32 && "Invalid QPX parameter type") ? void (
0) : __assert_fail ("ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3937, __extension__ __PRETTY_FUNCTION__))
3937 "Invalid QPX parameter type")(static_cast <bool> (ObjectVT.getSimpleVT().SimpleTy ==
MVT::v4f32 && "Invalid QPX parameter type") ? void (
0) : __assert_fail ("ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3937, __extension__ __PRETTY_FUNCTION__))
;
3938 /* fall through */
3939
3940 case MVT::v4f64:
3941 case MVT::v4i1:
3942 // QPX vectors are treated like their scalar floating-point subregisters
3943 // (except that they're larger).
3944 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3945 if (QFPR_idx != Num_QFPR_Regs) {
3946 const TargetRegisterClass *RC;
3947 switch (ObjectVT.getSimpleVT().SimpleTy) {
3948 case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3949 case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3950 default: RC = &PPC::QBRCRegClass; break;
3951 }
3952
3953 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3954 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3955 ++QFPR_idx;
3956 } else {
3957 if (CallConv == CallingConv::Fast)
3958 ComputeArgOffset();
3959 needsLoad = true;
3960 }
3961 if (CallConv != CallingConv::Fast || needsLoad)
3962 ArgOffset += Sz;
3963 break;
3964 }
3965
3966 // We need to load the argument to a virtual register if we determined
3967 // above that we ran out of physical registers of the appropriate type.
3968 if (needsLoad) {
3969 if (ObjSize < ArgSize && !isLittleEndian)
3970 CurArgOffset += ArgSize - ObjSize;
3971 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3972 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3973 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3974 }
3975
3976 InVals.push_back(ArgVal);
3977 }
3978
3979 // Area that is at least reserved in the caller of this function.
3980 unsigned MinReservedArea;
3981 if (HasParameterArea)
3982 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3983 else
3984 MinReservedArea = LinkageSize;
3985
3986 // Set the size that is at least reserved in caller of this function. Tail
3987 // call optimized functions' reserved stack space needs to be aligned so that
3988 // taking the difference between two stack areas will result in an aligned
3989 // stack.
3990 MinReservedArea =
3991 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3992 FuncInfo->setMinReservedArea(MinReservedArea);
3993
3994 // If the function takes variable number of arguments, make a frame index for
3995 // the start of the first vararg value... for expansion of llvm.va_start.
3996 if (isVarArg) {
3997 int Depth = ArgOffset;
3998
3999 FuncInfo->setVarArgsFrameIndex(
4000 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4001 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4002
4003 // If this function is vararg, store any remaining integer argument regs
4004 // to their spots on the stack so that they may be loaded by dereferencing
4005 // the result of va_next.
4006 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4007 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4008 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4009 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4010 SDValue Store =
4011 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4012 MemOps.push_back(Store);
4013 // Increment the address by four for the next argument to store
4014 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4015 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4016 }
4017 }
4018
4019 if (!MemOps.empty())
4020 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4021
4022 return Chain;
4023}
4024
4025SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4026 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4027 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4028 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4029 // TODO: add description of PPC stack frame format, or at least some docs.
4030 //
4031 MachineFunction &MF = DAG.getMachineFunction();
4032 MachineFrameInfo &MFI = MF.getFrameInfo();
4033 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4034
4035 EVT PtrVT = getPointerTy(MF.getDataLayout());
4036 bool isPPC64 = PtrVT == MVT::i64;
4037 // Potential tail calls could cause overwriting of argument stack slots.
4038 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4039 (CallConv == CallingConv::Fast));
4040 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4041 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4042 unsigned ArgOffset = LinkageSize;
4043 // Area that is at least reserved in caller of this function.
4044 unsigned MinReservedArea = ArgOffset;
4045
4046 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4047 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4048 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4049 };
4050 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4051 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4052 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4053 };
4054 static const MCPhysReg VR[] = {
4055 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4056 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4057 };
4058
4059 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4060 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4061 const unsigned Num_VR_Regs = array_lengthof( VR);
4062
4063 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4064
4065 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4066
4067 // In 32-bit non-varargs functions, the stack space for vectors is after the
4068 // stack space for non-vectors. We do not use this space unless we have
4069 // too many vectors to fit in registers, something that only occurs in
4070 // constructed examples:), but we have to walk the arglist to figure
4071 // that out...for the pathological case, compute VecArgOffset as the
4072 // start of the vector parameter area. Computing VecArgOffset is the
4073 // entire point of the following loop.
4074 unsigned VecArgOffset = ArgOffset;
4075 if (!isVarArg && !isPPC64) {
4076 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4077 ++ArgNo) {
4078 EVT ObjectVT = Ins[ArgNo].VT;
4079 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4080
4081 if (Flags.isByVal()) {
4082 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4083 unsigned ObjSize = Flags.getByValSize();
4084 unsigned ArgSize =
4085 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4086 VecArgOffset += ArgSize;
4087 continue;
4088 }
4089
4090 switch(ObjectVT.getSimpleVT().SimpleTy) {
4091 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4091)
;
4092 case MVT::i1:
4093 case MVT::i32:
4094 case MVT::f32:
4095 VecArgOffset += 4;
4096 break;
4097 case MVT::i64: // PPC64
4098 case MVT::f64:
4099 // FIXME: We are guaranteed to be !isPPC64 at this point.
4100 // Does MVT::i64 apply?
4101 VecArgOffset += 8;
4102 break;
4103 case MVT::v4f32:
4104 case MVT::v4i32:
4105 case MVT::v8i16:
4106 case MVT::v16i8:
4107 // Nothing to do, we're only looking at Nonvector args here.
4108 break;
4109 }
4110 }
4111 }
4112 // We've found where the vector parameter area in memory is. Skip the
4113 // first 12 parameters; these don't use that memory.
4114 VecArgOffset = ((VecArgOffset+15)/16)*16;
4115 VecArgOffset += 12*16;
4116
4117 // Add DAG nodes to load the arguments or copy them out of registers. On
4118 // entry to a function on PPC, the arguments start after the linkage area,
4119 // although the first ones are often in registers.
4120
4121 SmallVector<SDValue, 8> MemOps;
4122 unsigned nAltivecParamsAtEnd = 0;
4123 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4124 unsigned CurArgIdx = 0;
4125 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4126 SDValue ArgVal;
4127 bool needsLoad = false;
4128 EVT ObjectVT = Ins[ArgNo].VT;
4129 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4130 unsigned ArgSize = ObjSize;
4131 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4132 if (Ins[ArgNo].isOrigArg()) {
4133 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4134 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4135 }
4136 unsigned CurArgOffset = ArgOffset;
4137
4138 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4139 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4140 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4141 if (isVarArg || isPPC64) {
4142 MinReservedArea = ((MinReservedArea+15)/16)*16;
4143 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4144 Flags,
4145 PtrByteSize);
4146 } else nAltivecParamsAtEnd++;
4147 } else
4148 // Calculate min reserved area.
4149 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4150 Flags,
4151 PtrByteSize);
4152
4153 // FIXME the codegen can be much improved in some cases.
4154 // We do not have to keep everything in memory.
4155 if (Flags.isByVal()) {
4156 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")(static_cast <bool> (Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4156, __extension__ __PRETTY_FUNCTION__))
;
4157
4158 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4159 ObjSize = Flags.getByValSize();
4160 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4161 // Objects of size 1 and 2 are right justified, everything else is
4162 // left justified. This means the memory address is adjusted forwards.
4163 if (ObjSize==1 || ObjSize==2) {
4164 CurArgOffset = CurArgOffset + (4 - ObjSize);
4165 }
4166 // The value of the object is its address.
4167 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4168 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4169 InVals.push_back(FIN);
4170 if (ObjSize==1 || ObjSize==2) {
4171 if (GPR_idx != Num_GPR_Regs) {
4172 unsigned VReg;
4173 if (isPPC64)
4174 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4175 else
4176 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4177 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4178 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4179 SDValue Store =
4180 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4181 MachinePointerInfo(&*FuncArg), ObjType);
4182 MemOps.push_back(Store);
4183 ++GPR_idx;
4184 }
4185
4186 ArgOffset += PtrByteSize;
4187
4188 continue;
4189 }
4190 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4191 // Store whatever pieces of the object are in registers
4192 // to memory. ArgOffset will be the address of the beginning
4193 // of the object.
4194 if (GPR_idx != Num_GPR_Regs) {
4195 unsigned VReg;
4196 if (isPPC64)
4197 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4198 else
4199 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4200 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4201 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4202 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4203 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4204 MachinePointerInfo(&*FuncArg, j));
4205 MemOps.push_back(Store);
4206 ++GPR_idx;
4207 ArgOffset += PtrByteSize;
4208 } else {
4209 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4210 break;
4211 }
4212 }
4213 continue;
4214 }
4215
4216 switch (ObjectVT.getSimpleVT().SimpleTy) {
4217 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4217)
;
4218 case MVT::i1:
4219 case MVT::i32:
4220 if (!isPPC64) {
4221 if (GPR_idx != Num_GPR_Regs) {
4222 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4223 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4224
4225 if (ObjectVT == MVT::i1)
4226 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4227
4228 ++GPR_idx;
4229 } else {
4230 needsLoad = true;
4231 ArgSize = PtrByteSize;
4232 }
4233 // All int arguments reserve stack space in the Darwin ABI.
4234 ArgOffset += PtrByteSize;
4235 break;
4236 }
4237 LLVM_FALLTHROUGH[[clang::fallthrough]];
4238 case MVT::i64: // PPC64
4239 if (GPR_idx != Num_GPR_Regs) {
4240 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4241 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4242
4243 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4244 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4245 // value to MVT::i64 and then truncate to the correct register size.
4246 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4247
4248 ++GPR_idx;
4249 } else {
4250 needsLoad = true;
4251 ArgSize = PtrByteSize;
4252 }
4253 // All int arguments reserve stack space in the Darwin ABI.
4254 ArgOffset += 8;
4255 break;
4256
4257 case MVT::f32:
4258 case MVT::f64:
4259 // Every 4 bytes of argument space consumes one of the GPRs available for
4260 // argument passing.
4261 if (GPR_idx != Num_GPR_Regs) {
4262 ++GPR_idx;
4263 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4264 ++GPR_idx;
4265 }
4266 if (FPR_idx != Num_FPR_Regs) {
4267 unsigned VReg;
4268
4269 if (ObjectVT == MVT::f32)
4270 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4271 else
4272 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4273
4274 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4275 ++FPR_idx;
4276 } else {
4277 needsLoad = true;
4278 }
4279
4280 // All FP arguments reserve stack space in the Darwin ABI.
4281 ArgOffset += isPPC64 ? 8 : ObjSize;
4282 break;
4283 case MVT::v4f32:
4284 case MVT::v4i32:
4285 case MVT::v8i16:
4286 case MVT::v16i8:
4287 // Note that vector arguments in registers don't reserve stack space,
4288 // except in varargs functions.
4289 if (VR_idx != Num_VR_Regs) {
4290 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4291 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4292 if (isVarArg) {
4293 while ((ArgOffset % 16) != 0) {
4294 ArgOffset += PtrByteSize;
4295 if (GPR_idx != Num_GPR_Regs)
4296 GPR_idx++;
4297 }
4298 ArgOffset += 16;
4299 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4300 }
4301 ++VR_idx;
4302 } else {
4303 if (!isVarArg && !isPPC64) {
4304 // Vectors go after all the nonvectors.
4305 CurArgOffset = VecArgOffset;
4306 VecArgOffset += 16;
4307 } else {
4308 // Vectors are aligned.
4309 ArgOffset = ((ArgOffset+15)/16)*16;
4310 CurArgOffset = ArgOffset;
4311 ArgOffset += 16;
4312 }
4313 needsLoad = true;
4314 }
4315 break;
4316 }
4317
4318 // We need to load the argument to a virtual register if we determined above
4319 // that we ran out of physical registers of the appropriate type.
4320 if (needsLoad) {
4321 int FI = MFI.CreateFixedObject(ObjSize,
4322 CurArgOffset + (ArgSize - ObjSize),
4323 isImmutable);
4324 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4325 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4326 }
4327
4328 InVals.push_back(ArgVal);
4329 }
4330
4331 // Allow for Altivec parameters at the end, if needed.
4332 if (nAltivecParamsAtEnd) {
4333 MinReservedArea = ((MinReservedArea+15)/16)*16;
4334 MinReservedArea += 16*nAltivecParamsAtEnd;
4335 }
4336
4337 // Area that is at least reserved in the caller of this function.
4338 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4339
4340 // Set the size that is at least reserved in caller of this function. Tail
4341 // call optimized functions' reserved stack space needs to be aligned so that
4342 // taking the difference between two stack areas will result in an aligned
4343 // stack.
4344 MinReservedArea =
4345 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4346 FuncInfo->setMinReservedArea(MinReservedArea);
4347
4348 // If the function takes variable number of arguments, make a frame index for
4349 // the start of the first vararg value... for expansion of llvm.va_start.
4350 if (isVarArg) {
4351 int Depth = ArgOffset;
4352
4353 FuncInfo->setVarArgsFrameIndex(
4354 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4355 Depth, true));
4356 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4357
4358 // If this function is vararg, store any remaining integer argument regs
4359 // to their spots on the stack so that they may be loaded by dereferencing
4360 // the result of va_next.
4361 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4362 unsigned VReg;
4363
4364 if (isPPC64)
4365 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4366 else
4367 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4368
4369 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4370 SDValue Store =
4371 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4372 MemOps.push_back(Store);
4373 // Increment the address by four for the next argument to store
4374 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4375 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4376 }
4377 }
4378
4379 if (!MemOps.empty())
4380 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4381
4382 return Chain;
4383}
4384
4385/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4386/// adjusted to accommodate the arguments for the tailcall.
4387static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4388 unsigned ParamSize) {
4389
4390 if (!isTailCall) return 0;
4391
4392 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4393 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4394 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4395 // Remember only if the new adjustment is bigger.
4396 if (SPDiff < FI->getTailCallSPDelta())
4397 FI->setTailCallSPDelta(SPDiff);
4398
4399 return SPDiff;
4400}
4401
4402static bool isFunctionGlobalAddress(SDValue Callee);
4403
4404static bool
4405callsShareTOCBase(const Function *Caller, SDValue Callee,
4406 const TargetMachine &TM) {
4407 // If !G, Callee can be an external symbol.
4408 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4409 if (!G)
4410 return false;
4411
4412 // The medium and large code models are expected to provide a sufficiently
4413 // large TOC to provide all data addressing needs of a module with a
4414 // single TOC. Since each module will be addressed with a single TOC then we
4415 // only need to check that caller and callee don't cross dso boundaries.
4416 if (CodeModel::Medium == TM.getCodeModel() ||
4417 CodeModel::Large == TM.getCodeModel())
4418 return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
4419
4420 // Otherwise we need to ensure callee and caller are in the same section,
4421 // since the linker may allocate multiple TOCs, and we don't know which
4422 // sections will belong to the same TOC base.
4423
4424 const GlobalValue *GV = G->getGlobal();
4425 if (!GV->isStrongDefinitionForLinker())
4426 return false;
4427
4428 // Any explicitly-specified sections and section prefixes must also match.
4429 // Also, if we're using -ffunction-sections, then each function is always in
4430 // a different section (the same is true for COMDAT functions).
4431 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4432 GV->getSection() != Caller->getSection())
4433 return false;
4434 if (const auto *F = dyn_cast<Function>(GV)) {
4435 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4436 return false;
4437 }
4438
4439 // If the callee might be interposed, then we can't assume the ultimate call
4440 // target will be in the same section. Even in cases where we can assume that
4441 // interposition won't happen, in any case where the linker might insert a
4442 // stub to allow for interposition, we must generate code as though
4443 // interposition might occur. To understand why this matters, consider a
4444 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4445 // in the same section, but a is in a different module (i.e. has a different
4446 // TOC base pointer). If the linker allows for interposition between b and c,
4447 // then it will generate a stub for the call edge between b and c which will
4448 // save the TOC pointer into the designated stack slot allocated by b. If we
4449 // return true here, and therefore allow a tail call between b and c, that
4450 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4451 // pointer into the stack slot allocated by a (where the a -> b stub saved
4452 // a's TOC base pointer). If we're not considering a tail call, but rather,
4453 // whether a nop is needed after the call instruction in b, because the linker
4454 // will insert a stub, it might complain about a missing nop if we omit it
4455 // (although many don't complain in this case).
4456 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4457 return false;
4458
4459 return true;
4460}
4461
4462static bool
4463needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4464 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4465 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64())(static_cast <bool> (Subtarget.isSVR4ABI() && Subtarget
.isPPC64()) ? void (0) : __assert_fail ("Subtarget.isSVR4ABI() && Subtarget.isPPC64()"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4465, __extension__ __PRETTY_FUNCTION__))
;
4466
4467 const unsigned PtrByteSize = 8;
4468 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4469
4470 static const MCPhysReg GPR[] = {
4471 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4472 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4473 };
4474 static const MCPhysReg VR[] = {
4475 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4476 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4477 };
4478
4479 const unsigned NumGPRs = array_lengthof(GPR);
4480 const unsigned NumFPRs = 13;
4481 const unsigned NumVRs = array_lengthof(VR);
4482 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4483
4484 unsigned NumBytes = LinkageSize;
4485 unsigned AvailableFPRs = NumFPRs;
4486 unsigned AvailableVRs = NumVRs;
4487
4488 for (const ISD::OutputArg& Param : Outs) {
4489 if (Param.Flags.isNest()) continue;
4490
4491 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4492 PtrByteSize, LinkageSize, ParamAreaSize,
4493 NumBytes, AvailableFPRs, AvailableVRs,
4494 Subtarget.hasQPX()))
4495 return true;
4496 }
4497 return false;
4498}
4499
4500static bool
4501hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4502 if (CS.arg_size() != CallerFn->arg_size())
4503 return false;
4504
4505 ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4506 ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4507 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4508
4509 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4510 const Value* CalleeArg = *CalleeArgIter;
4511 const Value* CallerArg = &(*CallerArgIter);
4512 if (CalleeArg == CallerArg)
4513 continue;
4514
4515 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4516 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4517 // }
4518 // 1st argument of callee is undef and has the same type as caller.
4519 if (CalleeArg->getType() == CallerArg->getType() &&
4520 isa<UndefValue>(CalleeArg))
4521 continue;
4522
4523 return false;
4524 }
4525
4526 return true;
4527}
4528
4529// Returns true if TCO is possible between the callers and callees
4530// calling conventions.
4531static bool
4532areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4533 CallingConv::ID CalleeCC) {
4534 // Tail calls are possible with fastcc and ccc.
4535 auto isTailCallableCC = [] (CallingConv::ID CC){
4536 return CC == CallingConv::C || CC == CallingConv::Fast;
4537 };
4538 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4539 return false;
4540
4541 // We can safely tail call both fastcc and ccc callees from a c calling
4542 // convention caller. If the caller is fastcc, we may have less stack space
4543 // than a non-fastcc caller with the same signature so disable tail-calls in
4544 // that case.
4545 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4546}
4547
4548bool
4549PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4550 SDValue Callee,
4551 CallingConv::ID CalleeCC,
4552 ImmutableCallSite CS,
4553 bool isVarArg,
4554 const SmallVectorImpl<ISD::OutputArg> &Outs,
4555 const SmallVectorImpl<ISD::InputArg> &Ins,
4556 SelectionDAG& DAG) const {
4557 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4558
4559 if (DisableSCO && !TailCallOpt) return false;
4560
4561 // Variadic argument functions are not supported.
4562 if (isVarArg) return false;
4563
4564 auto &Caller = DAG.getMachineFunction().getFunction();
4565 // Check that the calling conventions are compatible for tco.
4566 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4567 return false;
4568
4569 // Caller contains any byval parameter is not supported.
4570 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4571 return false;
4572
4573 // Callee contains any byval parameter is not supported, too.
4574 // Note: This is a quick work around, because in some cases, e.g.
4575 // caller's stack size > callee's stack size, we are still able to apply
4576 // sibling call optimization. For example, gcc is able to do SCO for caller1
4577 // in the following example, but not for caller2.
4578 // struct test {
4579 // long int a;
4580 // char ary[56];
4581 // } gTest;
4582 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4583 // b->a = v.a;
4584 // return 0;
4585 // }
4586 // void caller1(struct test a, struct test c, struct test *b) {
4587 // callee(gTest, b); }
4588 // void caller2(struct test *b) { callee(gTest, b); }
4589 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4590 return false;
4591
4592 // If callee and caller use different calling conventions, we cannot pass
4593 // parameters on stack since offsets for the parameter area may be different.
4594 if (Caller.getCallingConv() != CalleeCC &&
4595 needStackSlotPassParameters(Subtarget, Outs))
4596 return false;
4597
4598 // No TCO/SCO on indirect call because Caller have to restore its TOC
4599 if (!isFunctionGlobalAddress(Callee) &&
4600 !isa<ExternalSymbolSDNode>(Callee))
4601 return false;
4602
4603 // If the caller and callee potentially have different TOC bases then we
4604 // cannot tail call since we need to restore the TOC pointer after the call.
4605 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4606 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4607 return false;
4608
4609 // TCO allows altering callee ABI, so we don't have to check further.
4610 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4611 return true;
4612
4613 if (DisableSCO) return false;
4614
4615 // If callee use the same argument list that caller is using, then we can
4616 // apply SCO on this case. If it is not, then we need to check if callee needs
4617 // stack for passing arguments.
4618 if (!hasSameArgumentList(&Caller, CS) &&
4619 needStackSlotPassParameters(Subtarget, Outs)) {
4620 return false;
4621 }
4622
4623 return true;
4624}
4625
4626/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4627/// for tail call optimization. Targets which want to do tail call
4628/// optimization should implement this function.
4629bool
4630PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4631 CallingConv::ID CalleeCC,
4632 bool isVarArg,
4633 const SmallVectorImpl<ISD::InputArg> &Ins,
4634 SelectionDAG& DAG) const {
4635 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4636 return false;
4637
4638 // Variable argument functions are not supported.
4639 if (isVarArg)
4640 return false;
4641
4642 MachineFunction &MF = DAG.getMachineFunction();
4643 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4644 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4645 // Functions containing by val parameters are not supported.
4646 for (unsigned i = 0; i != Ins.size(); i++) {
4647 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4648 if (Flags.isByVal()) return false;
4649 }
4650
4651 // Non-PIC/GOT tail calls are supported.
4652 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4653 return true;
4654
4655 // At the moment we can only do local tail calls (in same module, hidden
4656 // or protected) if we are generating PIC.
4657 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4658 return G->getGlobal()->hasHiddenVisibility()
4659 || G->getGlobal()->hasProtectedVisibility();
4660 }
4661
4662 return false;
4663}
4664
4665/// isCallCompatibleAddress - Return the immediate to use if the specified
4666/// 32-bit value is representable in the immediate field of a BxA instruction.
4667static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4668 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4669 if (!C) return nullptr;
4670
4671 int Addr = C->getZExtValue();
4672 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4673 SignExtend32<26>(Addr) != Addr)
4674 return nullptr; // Top 6 bits have to be sext of immediate.
4675
4676 return DAG
4677 .getConstant(
4678 (int)C->getZExtValue() >> 2, SDLoc(Op),
4679 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4680 .getNode();
4681}
4682
4683namespace {
4684
4685struct TailCallArgumentInfo {
4686 SDValue Arg;
4687 SDValue FrameIdxOp;
4688 int FrameIdx = 0;
4689
4690 TailCallArgumentInfo() = default;
4691};
4692
4693} // end anonymous namespace
4694
4695/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4696static void StoreTailCallArgumentsToStackSlot(
4697 SelectionDAG &DAG, SDValue Chain,
4698 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4699 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4700 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4701 SDValue Arg = TailCallArgs[i].Arg;
4702 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4703 int FI = TailCallArgs[i].FrameIdx;
4704 // Store relative to framepointer.
4705 MemOpChains.push_back(DAG.getStore(
4706 Chain, dl, Arg, FIN,
4707 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4708 }
4709}
4710
4711/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4712/// the appropriate stack slot for the tail call optimized function call.
4713static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4714 SDValue OldRetAddr, SDValue OldFP,
4715 int SPDiff, const SDLoc &dl) {
4716 if (SPDiff) {
4717 // Calculate the new stack slot for the return address.
4718 MachineFunction &MF = DAG.getMachineFunction();
4719 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4720 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4721 bool isPPC64 = Subtarget.isPPC64();
4722 int SlotSize = isPPC64 ? 8 : 4;
4723 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4724 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4725 NewRetAddrLoc, true);
4726 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4727 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4728 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4729 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4730
4731 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4732 // slot as the FP is never overwritten.
4733 if (Subtarget.isDarwinABI()) {
4734 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4735 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4736 true);
4737 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4738 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4739 MachinePointerInfo::getFixedStack(
4740 DAG.getMachineFunction(), NewFPIdx));
4741 }
4742 }
4743 return Chain;
4744}
4745
4746/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4747/// the position of the argument.
4748static void
4749CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4750 SDValue Arg, int SPDiff, unsigned ArgOffset,
4751 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4752 int Offset = ArgOffset + SPDiff;
4753 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4754 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4755 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4756 SDValue FIN = DAG.getFrameIndex(FI, VT);
4757 TailCallArgumentInfo Info;
4758 Info.Arg = Arg;
4759 Info.FrameIdxOp = FIN;
4760 Info.FrameIdx = FI;
4761 TailCallArguments.push_back(Info);
4762}
4763
4764/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4765/// stack slot. Returns the chain as result and the loaded frame pointers in
4766/// LROpOut/FPOpout. Used when tail calling.
4767SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4768 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4769 SDValue &FPOpOut, const SDLoc &dl) const {
4770 if (SPDiff) {
4771 // Load the LR and FP stack slot for later adjusting.
4772 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4773 LROpOut = getReturnAddrFrameIndex(DAG);
4774 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4775 Chain = SDValue(LROpOut.getNode(), 1);
4776
4777 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4778 // slot as the FP is never overwritten.
4779 if (Subtarget.isDarwinABI()) {
4780 FPOpOut = getFramePointerFrameIndex(DAG);
4781 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4782 Chain = SDValue(FPOpOut.getNode(), 1);
4783 }
4784 }
4785 return Chain;
4786}
4787
4788/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4789/// by "Src" to address "Dst" of size "Size". Alignment information is
4790/// specified by the specific parameter attribute. The copy will be passed as
4791/// a byval function parameter.
4792/// Sometimes what we are copying is the end of a larger object, the part that
4793/// does not fit in registers.
4794static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4795 SDValue Chain, ISD::ArgFlagsTy Flags,
4796 SelectionDAG &DAG, const SDLoc &dl) {
4797 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4798 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4799 false, false, false, MachinePointerInfo(),
4800 MachinePointerInfo());
4801}
4802
4803/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4804/// tail calls.
4805static void LowerMemOpCallTo(
4806 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4807 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4808 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4809 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4810 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4811 if (!isTailCall) {
4812 if (isVector) {
4813 SDValue StackPtr;
4814 if (isPPC64)
4815 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4816 else
4817 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4818 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4819 DAG.getConstant(ArgOffset, dl, PtrVT));
4820 }
4821 MemOpChains.push_back(
4822 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4823 // Calculate and remember argument location.
4824 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4825 TailCallArguments);
4826}
4827
4828static void
4829PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4830 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4831 SDValue FPOp,
4832 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4833 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4834 // might overwrite each other in case of tail call optimization.
4835 SmallVector<SDValue, 8> MemOpChains2;
4836 // Do not flag preceding copytoreg stuff together with the following stuff.
4837 InFlag = SDValue();
4838 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4839 MemOpChains2, dl);
4840 if (!MemOpChains2.empty())
4841 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4842
4843 // Store the return address to the appropriate stack slot.
4844 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4845
4846 // Emit callseq_end just before tailcall node.
4847 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4848 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4849 InFlag = Chain.getValue(1);
4850}
4851
4852// Is this global address that of a function that can be called by name? (as
4853// opposed to something that must hold a descriptor for an indirect call).
4854static bool isFunctionGlobalAddress(SDValue Callee) {
4855 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4856 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4857 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4858 return false;
4859
4860 return G->getGlobal()->getValueType()->isFunctionTy();
4861 }
4862
4863 return false;
4864}
4865
4866static unsigned
4867PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4868 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4869 bool isPatchPoint, bool hasNest,
4870 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4871 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4872 ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4873 bool isPPC64 = Subtarget.isPPC64();
4874 bool isSVR4ABI = Subtarget.isSVR4ABI();
4875 bool isELFv2ABI = Subtarget.isELFv2ABI();
4876
4877 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4878 NodeTys.push_back(MVT::Other); // Returns a chain
4879 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4880
4881 unsigned CallOpc = PPCISD::CALL;
4882
4883 bool needIndirectCall = true;
4884 if (!isSVR4ABI || !isPPC64)
4885 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4886 // If this is an absolute destination address, use the munged value.
4887 Callee = SDValue(Dest, 0);
4888 needIndirectCall = false;
4889 }
4890
4891 // PC-relative references to external symbols should go through $stub, unless
4892 // we're building with the leopard linker or later, which automatically
4893 // synthesizes these stubs.
4894 const TargetMachine &TM = DAG.getTarget();
4895 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
4896 const GlobalValue *GV = nullptr;
4897 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4898 GV = G->getGlobal();
4899 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4900 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4901
4902 if (isFunctionGlobalAddress(Callee)) {
4903 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4904 // A call to a TLS address is actually an indirect call to a
4905 // thread-specific pointer.
4906 unsigned OpFlags = 0;
4907 if (UsePlt)
4908 OpFlags = PPCII::MO_PLT;
4909
4910 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4911 // every direct call is) turn it into a TargetGlobalAddress /
4912 // TargetExternalSymbol node so that legalize doesn't hack it.
4913 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4914 Callee.getValueType(), 0, OpFlags);
4915 needIndirectCall = false;
4916 }
4917
4918 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4919 unsigned char OpFlags = 0;
4920
4921 if (UsePlt)
4922 OpFlags = PPCII::MO_PLT;
4923
4924 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4925 OpFlags);
4926 needIndirectCall = false;
4927 }
4928
4929 if (isPatchPoint) {
4930 // We'll form an invalid direct call when lowering a patchpoint; the full
4931 // sequence for an indirect call is complicated, and many of the
4932 // instructions introduced might have side effects (and, thus, can't be
4933 // removed later). The call itself will be removed as soon as the
4934 // argument/return lowering is complete, so the fact that it has the wrong
4935 // kind of operands should not really matter.
4936 needIndirectCall = false;
4937 }
4938
4939 if (needIndirectCall) {
4940 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
4941 // to do the call, we can't use PPCISD::CALL.
4942 SDValue MTCTROps[] = {Chain, Callee, InFlag};
4943
4944 if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4945 // Function pointers in the 64-bit SVR4 ABI do not point to the function
4946 // entry point, but to the function descriptor (the function entry point
4947 // address is part of the function descriptor though).
4948 // The function descriptor is a three doubleword structure with the
4949 // following fields: function entry point, TOC base address and
4950 // environment pointer.
4951 // Thus for a call through a function pointer, the following actions need
4952 // to be performed:
4953 // 1. Save the TOC of the caller in the TOC save area of its stack
4954 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4955 // 2. Load the address of the function entry point from the function
4956 // descriptor.
4957 // 3. Load the TOC of the callee from the function descriptor into r2.
4958 // 4. Load the environment pointer from the function descriptor into
4959 // r11.
4960 // 5. Branch to the function entry point address.
4961 // 6. On return of the callee, the TOC of the caller needs to be
4962 // restored (this is done in FinishCall()).
4963 //
4964 // The loads are scheduled at the beginning of the call sequence, and the
4965 // register copies are flagged together to ensure that no other
4966 // operations can be scheduled in between. E.g. without flagging the
4967 // copies together, a TOC access in the caller could be scheduled between
4968 // the assignment of the callee TOC and the branch to the callee, which
4969 // results in the TOC access going through the TOC of the callee instead
4970 // of going through the TOC of the caller, which leads to incorrect code.
4971
4972 // Load the address of the function entry point from the function
4973 // descriptor.
4974 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4975 if (LDChain.getValueType() == MVT::Glue)
4976 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4977
4978 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4979 ? (MachineMemOperand::MODereferenceable |
4980 MachineMemOperand::MOInvariant)
4981 : MachineMemOperand::MONone;
4982
4983 MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
4984 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4985 /* Alignment = */ 8, MMOFlags);
4986
4987 // Load environment pointer into r11.
4988 SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4989 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4990 SDValue LoadEnvPtr =
4991 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4992 /* Alignment = */ 8, MMOFlags);
4993
4994 SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4995 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4996 SDValue TOCPtr =
4997 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4998 /* Alignment = */ 8, MMOFlags);
4999
5000 setUsesTOCBasePtr(DAG);
5001 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5002 InFlag);
5003 Chain = TOCVal.getValue(0);
5004 InFlag = TOCVal.getValue(1);
5005
5006 // If the function call has an explicit 'nest' parameter, it takes the
5007 // place of the environment pointer.
5008 if (!hasNest) {
5009 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5010 InFlag);
5011
5012 Chain = EnvVal.getValue(0);
5013 InFlag = EnvVal.getValue(1);
5014 }
5015
5016 MTCTROps[0] = Chain;
5017 MTCTROps[1] = LoadFuncPtr;
5018 MTCTROps[2] = InFlag;
5019 }
5020
5021 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5022 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5023 InFlag = Chain.getValue(1);
5024
5025 NodeTys.clear();
5026 NodeTys.push_back(MVT::Other);
5027 NodeTys.push_back(MVT::Glue);
5028 Ops.push_back(Chain);
5029 CallOpc = PPCISD::BCTRL;
5030 Callee.setNode(nullptr);
5031 // Add use of X11 (holding environment pointer)
5032 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
5033 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5034 // Add CTR register as callee so a bctr can be emitted later.
5035 if (isTailCall)
5036 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5037 }
5038
5039 // If this is a direct call, pass the chain and the callee.
5040 if (Callee.getNode()) {
5041 Ops.push_back(Chain);
5042 Ops.push_back(Callee);
5043 }
5044 // If this is a tail call add stack pointer delta.
5045 if (isTailCall)
5046 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5047
5048 // Add argument registers to the end of the list so that they are known live
5049 // into the call.
5050 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5051 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5052 RegsToPass[i].second.getValueType()));
5053
5054 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
5055 // into the call.
5056 if (isSVR4ABI && isPPC64 && !isPatchPoint) {
5057 setUsesTOCBasePtr(DAG);
5058 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
5059 }
5060
5061 return CallOpc;
5062}
5063
5064SDValue PPCTargetLowering::LowerCallResult(
5065 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5066 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5067 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5068 SmallVector<CCValAssign, 16> RVLocs;
5069 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5070 *DAG.getContext());
5071
5072 CCRetInfo.AnalyzeCallResult(
5073 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5074 ? RetCC_PPC_Cold
5075 : RetCC_PPC);
5076
5077 // Copy all of the result registers out of their specified physreg.
5078 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5079 CCValAssign &VA = RVLocs[i];
5080 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5080, __extension__ __PRETTY_FUNCTION__))
;
5081
5082 SDValue Val = DAG.getCopyFromReg(Chain, dl,
5083 VA.getLocReg(), VA.getLocVT(), InFlag);
5084 Chain = Val.getValue(1);
5085 InFlag = Val.getValue(2);
5086
5087 switch (VA.getLocInfo()) {
5088 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5088)
;
5089 case CCValAssign::Full: break;
5090 case CCValAssign::AExt:
5091 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5092 break;
5093 case CCValAssign::ZExt:
5094 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5095 DAG.getValueType(VA.getValVT()));
5096 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5097 break;
5098 case CCValAssign::SExt:
5099 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5100 DAG.getValueType(VA.getValVT()));
5101 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5102 break;
5103 }
5104
5105 InVals.push_back(Val);
5106 }
5107
5108 return Chain;
5109}
5110
5111SDValue PPCTargetLowering::FinishCall(
5112 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5113 bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5114 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5115 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5116 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5117 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5118 std::vector<EVT> NodeTys;
5119 SmallVector<SDValue, 8> Ops;
5120 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5121 SPDiff, isTailCall, isPatchPoint, hasNest,
5122 RegsToPass, Ops, NodeTys, CS, Subtarget);
5123
5124 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5125 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5126 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5127
5128 // When performing tail call optimization the callee pops its arguments off
5129 // the stack. Account for this here so these bytes can be pushed back on in
5130 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5131 int BytesCalleePops =
5132 (CallConv == CallingConv::Fast &&
5133 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
5134
5135 // Add a register mask operand representing the call-preserved registers.
5136 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5137 const uint32_t *Mask =
5138 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5139 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5139, __extension__ __PRETTY_FUNCTION__))
;
5140 Ops.push_back(DAG.getRegisterMask(Mask));
5141
5142 if (InFlag.getNode())
5143 Ops.push_back(InFlag);
5144
5145 // Emit tail call.
5146 if (isTailCall) {
5147 assert(((Callee.getOpcode() == ISD::Register &&(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5152, __extension__ __PRETTY_FUNCTION__))
5148 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5152, __extension__ __PRETTY_FUNCTION__))
5149 Callee.getOpcode() == ISD::TargetExternalSymbol ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5152, __extension__ __PRETTY_FUNCTION__))
5150 Callee.getOpcode() == ISD::TargetGlobalAddress ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5152, __extension__ __PRETTY_FUNCTION__))
5151 isa<ConstantSDNode>(Callee)) &&(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5152, __extension__ __PRETTY_FUNCTION__))
5152 "Expecting an global address, external symbol, absolute value or register")(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5152, __extension__ __PRETTY_FUNCTION__))
;
5153
5154 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5155 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5156 }
5157
5158 // Add a NOP immediately after the branch instruction when using the 64-bit
5159 // SVR4 ABI. At link time, if caller and callee are in a different module and
5160 // thus have a different TOC, the call will be replaced with a call to a stub
5161 // function which saves the current TOC, loads the TOC of the callee and
5162 // branches to the callee. The NOP will be replaced with a load instruction
5163 // which restores the TOC of the caller from the TOC save slot of the current
5164 // stack frame. If caller and callee belong to the same module (and have the
5165 // same TOC), the NOP will remain unchanged.
5166
5167 MachineFunction &MF = DAG.getMachineFunction();
5168 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
5169 !isPatchPoint) {
5170 if (CallOpc == PPCISD::BCTRL) {
5171 // This is a call through a function pointer.
5172 // Restore the caller TOC from the save area into R2.
5173 // See PrepareCall() for more information about calls through function
5174 // pointers in the 64-bit SVR4 ABI.
5175 // We are using a target-specific load with r2 hard coded, because the
5176 // result of a target-independent load would never go directly into r2,
5177 // since r2 is a reserved register (which prevents the register allocator
5178 // from allocating it), resulting in an additional register being
5179 // allocated and an unnecessary move instruction being generated.
5180 CallOpc = PPCISD::BCTRL_LOAD_TOC;
5181
5182 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5183 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5184 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5185 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5186 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5187
5188 // The address needs to go after the chain input but before the flag (or
5189 // any other variadic arguments).
5190 Ops.insert(std::next(Ops.begin()), AddTOC);
5191 } else if (CallOpc == PPCISD::CALL &&
5192 !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5193 // Otherwise insert NOP for non-local calls.
5194 CallOpc = PPCISD::CALL_NOP;
5195 }
5196 }
5197
5198 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5199 InFlag = Chain.getValue(1);
5200
5201 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5202 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5203 InFlag, dl);
5204 if (!Ins.empty())
5205 InFlag = Chain.getValue(1);
5206
5207 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5208 Ins, dl, DAG, InVals);
5209}
5210
5211SDValue
5212PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5213 SmallVectorImpl<SDValue> &InVals) const {
5214 SelectionDAG &DAG = CLI.DAG;
5215 SDLoc &dl = CLI.DL;
5216 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5217 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5218 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5219 SDValue Chain = CLI.Chain;
5220 SDValue Callee = CLI.Callee;
5221 bool &isTailCall = CLI.IsTailCall;
5222 CallingConv::ID CallConv = CLI.CallConv;
5223 bool isVarArg = CLI.IsVarArg;
5224 bool isPatchPoint = CLI.IsPatchPoint;
5225 ImmutableCallSite CS = CLI.CS;
5226
5227 if (isTailCall) {
5228 if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5229 isTailCall = false;
5230 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5231 isTailCall =
5232 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5233 isVarArg, Outs, Ins, DAG);
5234 else
5235 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5236 Ins, DAG);
5237 if (isTailCall) {
5238 ++NumTailCalls;
5239 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5240 ++NumSiblingCalls;
5241
5242 assert(isa<GlobalAddressSDNode>(Callee) &&(static_cast <bool> (isa<GlobalAddressSDNode>(Callee
) && "Callee should be an llvm::Function object.") ? void
(0) : __assert_fail ("isa<GlobalAddressSDNode>(Callee) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5243, __extension__ __PRETTY_FUNCTION__))
5243 "Callee should be an llvm::Function object.")(static_cast <bool> (isa<GlobalAddressSDNode>(Callee
) && "Callee should be an llvm::Function object.") ? void
(0) : __assert_fail ("isa<GlobalAddressSDNode>(Callee) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5243, __extension__ __PRETTY_FUNCTION__))
;
5244 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5245 const GlobalValue *GV =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5246 cast<GlobalAddressSDNode>(Callee)->getGlobal();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5247 const unsigned Width =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5248 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5249 dbgs() << "TCO caller: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5250 << left_justify(DAG.getMachineFunction().getName(), Width)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5251 << ", callee linkage: " << GV->getVisibility() << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5252 << GV->getLinkage() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
;
5253 }
5254 }
5255
5256 if (!isTailCall && CS && CS.isMustTailCall())
5257 report_fatal_error("failed to perform tail call elimination on a call "
5258 "site marked musttail");
5259
5260 // When long calls (i.e. indirect calls) are always used, calls are always
5261 // made via function pointer. If we have a function name, first translate it
5262 // into a pointer.
5263 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5264 !isTailCall)
5265 Callee = LowerGlobalAddress(Callee, DAG);
5266
5267 if (Subtarget.isSVR4ABI()) {
5268 if (Subtarget.isPPC64())
5269 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5270 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5271 dl, DAG, InVals, CS);
5272 else
5273 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5274 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5275 dl, DAG, InVals, CS);
5276 }
5277
5278 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5279 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5280 dl, DAG, InVals, CS);
5281}
5282
5283SDValue PPCTargetLowering::LowerCall_32SVR4(
5284 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5285 bool isTailCall, bool isPatchPoint,
5286 const SmallVectorImpl<ISD::OutputArg> &Outs,
5287 const SmallVectorImpl<SDValue> &OutVals,
5288 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5289 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5290 ImmutableCallSite CS) const {
5291 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5292 // of the 32-bit SVR4 ABI stack frame layout.
5293
5294 assert((CallConv == CallingConv::C ||(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unknown calling convention!") ? void (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5296, __extension__ __PRETTY_FUNCTION__))
5295 CallConv == CallingConv::Cold ||(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unknown calling convention!") ? void (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5296, __extension__ __PRETTY_FUNCTION__))
5296 CallConv == CallingConv::Fast) && "Unknown calling convention!")(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unknown calling convention!") ? void (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5296, __extension__ __PRETTY_FUNCTION__))
;
5297
5298 unsigned PtrByteSize = 4;
5299
5300 MachineFunction &MF = DAG.getMachineFunction();
5301
5302 // Mark this function as potentially containing a function that contains a
5303 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5304 // and restoring the callers stack pointer in this functions epilog. This is
5305 // done because by tail calling the called function might overwrite the value
5306 // in this function's (MF) stack pointer stack slot 0(SP).
5307 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5308 CallConv == CallingConv::Fast)
5309 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5310
5311 // Count how many bytes are to be pushed on the stack, including the linkage
5312 // area, parameter list area and the part of the local variable space which
5313 // contains copies of aggregates which are passed by value.
5314
5315 // Assign locations to all of the outgoing arguments.
5316 SmallVector<CCValAssign, 16> ArgLocs;
5317 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5318
5319 // Reserve space for the linkage area on the stack.
5320 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5321 PtrByteSize);
5322 if (useSoftFloat())
5323 CCInfo.PreAnalyzeCallOperands(Outs);
5324
5325 if (isVarArg) {
5326 // Handle fixed and variable vector arguments differently.
5327 // Fixed vector arguments go into registers as long as registers are
5328 // available. Variable vector arguments always go into memory.
5329 unsigned NumArgs = Outs.size();
5330
5331 for (unsigned i = 0; i != NumArgs; ++i) {
5332 MVT ArgVT = Outs[i].VT;
5333 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5334 bool Result;
5335
5336 if (Outs[i].IsFixed) {
5337 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5338 CCInfo);
5339 } else {
5340 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5341 ArgFlags, CCInfo);
5342 }
5343
5344 if (Result) {
5345#ifndef NDEBUG
5346 errs() << "Call operand #" << i << " has unhandled type "
5347 << EVT(ArgVT).getEVTString() << "\n";
5348#endif
5349 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5349)
;
5350 }
5351 }
5352 } else {
5353 // All arguments are treated the same.
5354 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5355 }
5356 CCInfo.clearWasPPCF128();
5357
5358 // Assign locations to all of the outgoing aggregate by value arguments.
5359 SmallVector<CCValAssign, 16> ByValArgLocs;
5360 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5361
5362 // Reserve stack space for the allocations in CCInfo.
5363 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5364
5365 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5366
5367 // Size of the linkage area, parameter list area and the part of the local
5368 // space variable where copies of aggregates which are passed by value are
5369 // stored.
5370 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5371
5372 // Calculate by how many bytes the stack has to be adjusted in case of tail
5373 // call optimization.
5374 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5375
5376 // Adjust the stack pointer for the new arguments...
5377 // These operations are automatically eliminated by the prolog/epilog pass
5378 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5379 SDValue CallSeqStart = Chain;
5380
5381 // Load the return address and frame pointer so it can be moved somewhere else
5382 // later.
5383 SDValue LROp, FPOp;
5384 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5385
5386 // Set up a copy of the stack pointer for use loading and storing any
5387 // arguments that may not fit in the registers available for argument
5388 // passing.
5389 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5390
5391 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5392 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5393 SmallVector<SDValue, 8> MemOpChains;
5394
5395 bool seenFloatArg = false;
5396 // Walk the register/memloc assignments, inserting copies/loads.
5397 for (unsigned i = 0, j = 0, e = ArgLocs.size();
5398 i != e;
5399 ++i) {
5400 CCValAssign &VA = ArgLocs[i];
5401 SDValue Arg = OutVals[i];
5402 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5403
5404 if (Flags.isByVal()) {
5405 // Argument is an aggregate which is passed by value, thus we need to
5406 // create a copy of it in the local variable space of the current stack
5407 // frame (which is the stack frame of the caller) and pass the address of
5408 // this copy to the callee.
5409 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(static_cast <bool> ((j < ByValArgLocs.size()) &&
"Index out of bounds!") ? void (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5409, __extension__ __PRETTY_FUNCTION__))
;
5410 CCValAssign &ByValVA = ByValArgLocs[j++];
5411 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(static_cast <bool> ((VA.getValNo() == ByValVA.getValNo
()) && "ValNo mismatch!") ? void (0) : __assert_fail (
"(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5411, __extension__ __PRETTY_FUNCTION__))
;
5412
5413 // Memory reserved in the local variable space of the callers stack frame.
5414 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5415
5416 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5417 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5418 StackPtr, PtrOff);
5419
5420 // Create a copy of the argument in the local area of the current
5421 // stack frame.
5422 SDValue MemcpyCall =
5423 CreateCopyOfByValArgument(Arg, PtrOff,
5424 CallSeqStart.getNode()->getOperand(0),
5425 Flags, DAG, dl);
5426
5427 // This must go outside the CALLSEQ_START..END.
5428 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5429 SDLoc(MemcpyCall));
5430 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5431 NewCallSeqStart.getNode());
5432 Chain = CallSeqStart = NewCallSeqStart;
5433
5434 // Pass the address of the aggregate copy on the stack either in a
5435 // physical register or in the parameter list area of the current stack
5436 // frame to the callee.
5437 Arg = PtrOff;
5438 }
5439
5440 if (VA.isRegLoc()) {
5441 if (Arg.getValueType() == MVT::i1)
5442 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
5443
5444 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5445 // Put argument in a physical register.
5446 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5447 } else {
5448 // Put argument in the parameter list area of the current stack frame.
5449 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5449, __extension__ __PRETTY_FUNCTION__))
;
5450 unsigned LocMemOffset = VA.getLocMemOffset();
5451
5452 if (!isTailCall) {
5453 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5454 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5455 StackPtr, PtrOff);
5456
5457 MemOpChains.push_back(
5458 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5459 } else {
5460 // Calculate and remember argument location.
5461 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5462 TailCallArguments);
5463 }
5464 }
5465 }
5466
5467 if (!MemOpChains.empty())
5468 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5469
5470 // Build a sequence of copy-to-reg nodes chained together with token chain
5471 // and flag operands which copy the outgoing args into the appropriate regs.
5472 SDValue InFlag;
5473 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5474 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5475 RegsToPass[i].second, InFlag);
5476 InFlag = Chain.getValue(1);
5477 }
5478
5479 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5480 // registers.
5481 if (isVarArg) {
5482 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5483 SDValue Ops[] = { Chain, InFlag };
5484
5485 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5486 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5487
5488 InFlag = Chain.getValue(1);
5489 }
5490
5491 if (isTailCall)
5492 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5493 TailCallArguments);
5494
5495 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5496 /* unused except on PPC64 ELFv1 */ false, DAG,
5497 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5498 NumBytes, Ins, InVals, CS);
5499}
5500
5501// Copy an argument into memory, being careful to do this outside the
5502// call sequence for the call to which the argument belongs.
5503SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5504 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5505 SelectionDAG &DAG, const SDLoc &dl) const {
5506 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5507 CallSeqStart.getNode()->getOperand(0),
5508 Flags, DAG, dl);
5509 // The MEMCPY must go outside the CALLSEQ_START..END.
5510 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5511 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5512 SDLoc(MemcpyCall));
5513 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5514 NewCallSeqStart.getNode());
5515 return NewCallSeqStart;
5516}
5517
5518SDValue PPCTargetLowering::LowerCall_64SVR4(
5519 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5520 bool isTailCall, bool isPatchPoint,
5521 const SmallVectorImpl<ISD::OutputArg> &Outs,
5522 const SmallVectorImpl<SDValue> &OutVals,
5523 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5524 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5525 ImmutableCallSite CS) const {
5526 bool isELFv2ABI = Subtarget.isELFv2ABI();
5527 bool isLittleEndian = Subtarget.isLittleEndian();
5528 unsigned NumOps = Outs.size();
5529 bool hasNest = false;
5530 bool IsSibCall = false;
5531
5532 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5533 unsigned PtrByteSize = 8;
5534
5535 MachineFunction &MF = DAG.getMachineFunction();
5536
5537 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5538 IsSibCall = true;
5539
5540 // Mark this function as potentially containing a function that contains a
5541 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5542 // and restoring the callers stack pointer in this functions epilog. This is
5543 // done because by tail calling the called function might overwrite the value
5544 // in this function's (MF) stack pointer stack slot 0(SP).
5545 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5546 CallConv == CallingConv::Fast)
5547 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5548
5549 assert(!(CallConv == CallingConv::Fast && isVarArg) &&(static_cast <bool> (!(CallConv == CallingConv::Fast &&
isVarArg) && "fastcc not supported on varargs functions"
) ? void (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5550, __extension__ __PRETTY_FUNCTION__))
5550 "fastcc not supported on varargs functions")(static_cast <bool> (!(CallConv == CallingConv::Fast &&
isVarArg) && "fastcc not supported on varargs functions"
) ? void (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5550, __extension__ __PRETTY_FUNCTION__))
;
5551
5552 // Count how many bytes are to be pushed on the stack, including the linkage
5553 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5554 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5555 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5556 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5557 unsigned NumBytes = LinkageSize;
5558 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5559 unsigned &QFPR_idx = FPR_idx;
5560
5561 static const MCPhysReg GPR[] = {
5562 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5563 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5564 };
5565 static const MCPhysReg VR[] = {
5566 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5567 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5568 };
5569
5570 const unsigned NumGPRs = array_lengthof(GPR);
5571 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5572 const unsigned NumVRs = array_lengthof(VR);
5573 const unsigned NumQFPRs = NumFPRs;
5574
5575 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5576 // can be passed to the callee in registers.
5577 // For the fast calling convention, there is another check below.
5578 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5579 bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5580 if (!HasParameterArea) {
5581 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5582 unsigned AvailableFPRs = NumFPRs;
5583 unsigned AvailableVRs = NumVRs;
5584 unsigned NumBytesTmp = NumBytes;
5585 for (unsigned i = 0; i != NumOps; ++i) {
5586 if (Outs[i].Flags.isNest()) continue;
5587 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5588 PtrByteSize, LinkageSize, ParamAreaSize,
5589 NumBytesTmp, AvailableFPRs, AvailableVRs,
5590 Subtarget.hasQPX()))
5591 HasParameterArea = true;
5592 }
5593 }
5594
5595 // When using the fast calling convention, we don't provide backing for
5596 // arguments that will be in registers.
5597 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5598
5599 // Avoid allocating parameter area for fastcc functions if all the arguments
5600 // can be passed in the registers.
5601 if (CallConv == CallingConv::Fast)
5602 HasParameterArea = false;
5603
5604 // Add up all the space actually used.
5605 for (unsigned i = 0; i != NumOps; ++i) {
5606 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5607 EVT ArgVT = Outs[i].VT;
5608 EVT OrigVT = Outs[i].ArgVT;
5609
5610 if (Flags.isNest())
5611 continue;
5612
5613 if (CallConv == CallingConv::Fast) {
5614 if (Flags.isByVal()) {
5615 NumGPRsUsed += (Flags.getByValSize()+7)/8;
5616 if (NumGPRsUsed > NumGPRs)
5617 HasParameterArea = true;
5618 } else {
5619 switch (ArgVT.getSimpleVT().SimpleTy) {
5620 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5620)
;
5621 case MVT::i1:
5622 case MVT::i32:
5623 case MVT::i64:
5624 if (++NumGPRsUsed <= NumGPRs)
5625 continue;
5626 break;
5627 case MVT::v4i32:
5628 case MVT::v8i16:
5629 case MVT::v16i8:
5630 case MVT::v2f64:
5631 case MVT::v2i64:
5632 case MVT::v1i128:
5633 case MVT::f128:
5634 if (++NumVRsUsed <= NumVRs)
5635 continue;
5636 break;
5637 case MVT::v4f32:
5638 // When using QPX, this is handled like a FP register, otherwise, it
5639 // is an Altivec register.
5640 if (Subtarget.hasQPX()) {
5641 if (++NumFPRsUsed <= NumFPRs)
5642 continue;
5643 } else {
5644 if (++NumVRsUsed <= NumVRs)
5645 continue;
5646 }
5647 break;
5648 case MVT::f32:
5649 case MVT::f64:
5650 case MVT::v4f64: // QPX
5651 case MVT::v4i1: // QPX
5652 if (++NumFPRsUsed <= NumFPRs)
5653 continue;
5654 break;
5655 }
5656 HasParameterArea = true;
5657 }
5658 }
5659
5660 /* Respect alignment of argument on the stack. */
5661 unsigned Align =
5662 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5663 NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5664
5665 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5666 if (Flags.isInConsecutiveRegsLast())
5667 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5668 }
5669
5670 unsigned NumBytesActuallyUsed = NumBytes;
5671
5672 // In the old ELFv1 ABI,
5673 // the prolog code of the callee may store up to 8 GPR argument registers to
5674 // the stack, allowing va_start to index over them in memory if its varargs.
5675 // Because we cannot tell if this is needed on the caller side, we have to
5676 // conservatively assume that it is needed. As such, make sure we have at
5677 // least enough stack space for the caller to store the 8 GPRs.
5678 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5679 // really requires memory operands, e.g. a vararg function.
5680 if (HasParameterArea)
5681 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5682 else
5683 NumBytes = LinkageSize;
5684
5685 // Tail call needs the stack to be aligned.
5686 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5687 CallConv == CallingConv::Fast)
5688 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5689
5690 int SPDiff = 0;
5691
5692 // Calculate by how many bytes the stack has to be adjusted in case of tail
5693 // call optimization.
5694 if (!IsSibCall)
5695 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5696
5697 // To protect arguments on the stack from being clobbered in a tail call,
5698 // force all the loads to happen before doing any other lowering.
5699 if (isTailCall)
5700 Chain = DAG.getStackArgumentTokenFactor(Chain);
5701
5702 // Adjust the stack pointer for the new arguments...
5703 // These operations are automatically eliminated by the prolog/epilog pass
5704 if (!IsSibCall)
5705 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5706 SDValue CallSeqStart = Chain;
5707
5708 // Load the return address and frame pointer so it can be move somewhere else
5709 // later.
5710 SDValue LROp, FPOp;
5711 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5712
5713 // Set up a copy of the stack pointer for use loading and storing any
5714 // arguments that may not fit in the registers available for argument
5715 // passing.
5716 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5717
5718 // Figure out which arguments are going to go in registers, and which in
5719 // memory. Also, if this is a vararg function, floating point operations
5720 // must be stored to our stack, and loaded into integer regs as well, if
5721 // any integer regs are available for argument passing.
5722 unsigned ArgOffset = LinkageSize;
5723
5724 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5725 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5726
5727 SmallVector<SDValue, 8> MemOpChains;
5728 for (unsigned i = 0; i != NumOps; ++i) {
5729 SDValue Arg = OutVals[i];
5730 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5731 EVT ArgVT = Outs[i].VT;
5732 EVT OrigVT = Outs[i].ArgVT;
5733
5734 // PtrOff will be used to store the current argument to the stack if a
5735 // register cannot be found for it.
5736 SDValue PtrOff;
5737
5738 // We re-align the argument offset for each argument, except when using the
5739 // fast calling convention, when we need to make sure we do that only when
5740 // we'll actually use a stack slot.
5741 auto ComputePtrOff = [&]() {
5742 /* Respect alignment of argument on the stack. */
5743 unsigned Align =
5744 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5745 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5746
5747 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5748
5749 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5750 };
5751
5752 if (CallConv != CallingConv::Fast) {
5753 ComputePtrOff();
5754
5755 /* Compute GPR index associated with argument offset. */
5756 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5757 GPR_idx = std::min(GPR_idx, NumGPRs);
5758 }
5759
5760 // Promote integers to 64-bit values.
5761 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5762 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5763 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5764 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5765 }
5766
5767 // FIXME memcpy is used way more than necessary. Correctness first.
5768 // Note: "by value" is code for passing a structure by value, not
5769 // basic types.
5770 if (Flags.isByVal()) {
5771 // Note: Size includes alignment padding, so
5772 // struct x { short a; char b; }
5773 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5774 // These are the proper values we need for right-justifying the
5775 // aggregate in a parameter register.
5776 unsigned Size = Flags.getByValSize();
5777
5778 // An empty aggregate parameter takes up no storage and no
5779 // registers.
5780 if (Size == 0)
5781 continue;
5782
5783 if (CallConv == CallingConv::Fast)
5784 ComputePtrOff();
5785
5786 // All aggregates smaller than 8 bytes must be passed right-justified.
5787 if (Size==1 || Size==2 || Size==4) {
5788 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5789 if (GPR_idx != NumGPRs) {
5790 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5791 MachinePointerInfo(), VT);
5792 MemOpChains.push_back(Load.getValue(1));
5793 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5794
5795 ArgOffset += PtrByteSize;
5796 continue;
5797 }
5798 }
5799
5800 if (GPR_idx == NumGPRs && Size < 8) {
5801 SDValue AddPtr = PtrOff;
5802 if (!isLittleEndian) {
5803 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5804 PtrOff.getValueType());
5805 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5806 }
5807 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5808 CallSeqStart,
5809 Flags, DAG, dl);
5810 ArgOffset += PtrByteSize;
5811 continue;
5812 }
5813 // Copy entire object into memory. There are cases where gcc-generated
5814 // code assumes it is there, even if it could be put entirely into
5815 // registers. (This is not what the doc says.)
5816
5817 // FIXME: The above statement is likely due to a misunderstanding of the
5818 // documents. All arguments must be copied into the parameter area BY
5819 // THE CALLEE in the event that the callee takes the address of any
5820 // formal argument. That has not yet been implemented. However, it is
5821 // reasonable to use the stack area as a staging area for the register
5822 // load.
5823
5824 // Skip this for small aggregates, as we will use the same slot for a
5825 // right-justified copy, below.
5826 if (Size >= 8)
5827 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5828 CallSeqStart,
5829 Flags, DAG, dl);
5830
5831 // When a register is available, pass a small aggregate right-justified.
5832 if (Size < 8 && GPR_idx != NumGPRs) {
5833 // The easiest way to get this right-justified in a register
5834 // is to copy the structure into the rightmost portion of a
5835 // local variable slot, then load the whole slot into the
5836 // register.
5837 // FIXME: The memcpy seems to produce pretty awful code for
5838 // small aggregates, particularly for packed ones.
5839 // FIXME: It would be preferable to use the slot in the
5840 // parameter save area instead of a new local variable.
5841 SDValue AddPtr = PtrOff;
5842 if (!isLittleEndian) {
5843 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5844 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5845 }
5846 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5847 CallSeqStart,
5848 Flags, DAG, dl);
5849
5850 // Load the slot into the register.
5851 SDValue Load =
5852 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5853 MemOpChains.push_back(Load.getValue(1));
5854 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5855
5856 // Done with this argument.
5857 ArgOffset += PtrByteSize;
5858 continue;
5859 }
5860
5861 // For aggregates larger than PtrByteSize, copy the pieces of the
5862 // object that fit into registers from the parameter save area.
5863 for (unsigned j=0; j<Size; j+=PtrByteSize) {
5864 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5865 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5866 if (GPR_idx != NumGPRs) {
5867 SDValue Load =
5868 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5869 MemOpChains.push_back(Load.getValue(1));
5870 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5871 ArgOffset += PtrByteSize;
5872 } else {
5873 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5874 break;
5875 }
5876 }
5877 continue;
5878 }
5879
5880 switch (Arg.getSimpleValueType().SimpleTy) {
5881 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5881)
;
5882 case MVT::i1:
5883 case MVT::i32:
5884 case MVT::i64:
5885 if (Flags.isNest()) {
5886 // The 'nest' parameter, if any, is passed in R11.
5887 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5888 hasNest = true;
5889 break;
5890 }
5891
5892 // These can be scalar arguments or elements of an integer array type
5893 // passed directly. Clang may use those instead of "byval" aggregate
5894 // types to avoid forcing arguments to memory unnecessarily.
5895 if (GPR_idx != NumGPRs) {
5896 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5897 } else {
5898 if (CallConv == CallingConv::Fast)
5899 ComputePtrOff();
5900
5901 assert(HasParameterArea &&(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5902, __extension__ __PRETTY_FUNCTION__))
5902 "Parameter area must exist to pass an argument in memory.")(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5902, __extension__ __PRETTY_FUNCTION__))
;
5903 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5904 true, isTailCall, false, MemOpChains,
5905 TailCallArguments, dl);
5906 if (CallConv == CallingConv::Fast)
5907 ArgOffset += PtrByteSize;
5908 }
5909 if (CallConv != CallingConv::Fast)
5910 ArgOffset += PtrByteSize;
5911 break;
5912 case MVT::f32:
5913 case MVT::f64: {
5914 // These can be scalar arguments or elements of a float array type
5915 // passed directly. The latter are used to implement ELFv2 homogenous
5916 // float aggregates.
5917
5918 // Named arguments go into FPRs first, and once they overflow, the
5919 // remaining arguments go into GPRs and then the parameter save area.
5920 // Unnamed arguments for vararg functions always go to GPRs and
5921 // then the parameter save area. For now, put all arguments to vararg
5922 // routines always in both locations (FPR *and* GPR or