Bug Summary

File:lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 8832, column 31
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn372087/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-10~svn372087/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn372087/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn372087/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn372087=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-09-17-145504-7198-1 -x c++ /build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp

/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp

1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetRegisterInfo.h"
56#include "llvm/CodeGen/ValueTypes.h"
57#include "llvm/IR/CallSite.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
75#include "llvm/MC/MCRegisterInfo.h"
76#include "llvm/MC/MCSymbolXCOFF.h"
77#include "llvm/Support/AtomicOrdering.h"
78#include "llvm/Support/BranchProbability.h"
79#include "llvm/Support/Casting.h"
80#include "llvm/Support/CodeGen.h"
81#include "llvm/Support/CommandLine.h"
82#include "llvm/Support/Compiler.h"
83#include "llvm/Support/Debug.h"
84#include "llvm/Support/ErrorHandling.h"
85#include "llvm/Support/Format.h"
86#include "llvm/Support/KnownBits.h"
87#include "llvm/Support/MachineValueType.h"
88#include "llvm/Support/MathExtras.h"
89#include "llvm/Support/raw_ostream.h"
90#include "llvm/Target/TargetMachine.h"
91#include "llvm/Target/TargetOptions.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
103
104static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
105cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
106
107static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
108cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
109
110static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableSCO("disable-ppc-sco",
114cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
115
116static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
118
119static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120cl::desc("enable quad precision float support on ppc"), cl::Hidden);
121
122STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
123STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls", {0}, {false}}
;
124
125static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
126
127static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
128
129// FIXME: Remove this once the bug has been fixed!
130extern cl::opt<bool> ANDIGlueBug;
131
132PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
133 const PPCSubtarget &STI)
134 : TargetLowering(TM), Subtarget(STI) {
135 // Use _setjmp/_longjmp instead of setjmp/longjmp.
136 setUseUnderscoreSetJmp(true);
137 setUseUnderscoreLongJmp(true);
138
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64 = Subtarget.isPPC64();
142 setMinStackArgumentAlignment(isPPC64 ? llvm::Align(8) : llvm::Align(4));
143
144 // Set up the register classes.
145 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146 if (!useSoftFloat()) {
147 if (hasSPE()) {
148 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
149 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150 } else {
151 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153 }
154 }
155
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT : MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167 }
168
169 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
170
171 // PowerPC has pre-inc load and store's.
172 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
173 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
174 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
175 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
176 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
177 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
178 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
179 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
180 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
181 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
182 if (!Subtarget.hasSPE()) {
183 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
184 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
185 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
186 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
187 }
188
189 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
191 for (MVT VT : ScalarIntVTs) {
192 setOperationAction(ISD::ADDC, VT, Legal);
193 setOperationAction(ISD::ADDE, VT, Legal);
194 setOperationAction(ISD::SUBC, VT, Legal);
195 setOperationAction(ISD::SUBE, VT, Legal);
196 }
197
198 if (Subtarget.useCRBits()) {
199 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
200
201 if (isPPC64 || Subtarget.hasFPCVT()) {
202 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
203 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
204 isPPC64 ? MVT::i64 : MVT::i32);
205 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
206 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
207 isPPC64 ? MVT::i64 : MVT::i32);
208 } else {
209 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
210 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
211 }
212
213 // PowerPC does not support direct load/store of condition registers.
214 setOperationAction(ISD::LOAD, MVT::i1, Custom);
215 setOperationAction(ISD::STORE, MVT::i1, Custom);
216
217 // FIXME: Remove this once the ANDI glue bug is fixed:
218 if (ANDIGlueBug)
219 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
220
221 for (MVT VT : MVT::integer_valuetypes()) {
222 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
223 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
224 setTruncStoreAction(VT, MVT::i1, Expand);
225 }
226
227 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
228 }
229
230 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231 // PPC (the libcall is not available).
232 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
233 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
234
235 // We do not currently implement these libm ops for PowerPC.
236 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
237 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
238 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
239 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
240 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
241 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
242
243 // PowerPC has no SREM/UREM instructions unless we are on P9
244 // On P9 we may use a hardware instruction to compute the remainder.
245 // The instructions are not legalized directly because in the cases where the
246 // result of both the remainder and the division is required it is more
247 // efficient to compute the remainder from the result of the division rather
248 // than use the remainder instruction.
249 if (Subtarget.isISA3_0()) {
250 setOperationAction(ISD::SREM, MVT::i32, Custom);
251 setOperationAction(ISD::UREM, MVT::i32, Custom);
252 setOperationAction(ISD::SREM, MVT::i64, Custom);
253 setOperationAction(ISD::UREM, MVT::i64, Custom);
254 } else {
255 setOperationAction(ISD::SREM, MVT::i32, Expand);
256 setOperationAction(ISD::UREM, MVT::i32, Expand);
257 setOperationAction(ISD::SREM, MVT::i64, Expand);
258 setOperationAction(ISD::UREM, MVT::i64, Expand);
259 }
260
261 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
263 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
264 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
265 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
266 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
267 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
268 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
269 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
270
271 // We don't support sin/cos/sqrt/fmod/pow
272 setOperationAction(ISD::FSIN , MVT::f64, Expand);
273 setOperationAction(ISD::FCOS , MVT::f64, Expand);
274 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
275 setOperationAction(ISD::FREM , MVT::f64, Expand);
276 setOperationAction(ISD::FPOW , MVT::f64, Expand);
277 setOperationAction(ISD::FSIN , MVT::f32, Expand);
278 setOperationAction(ISD::FCOS , MVT::f32, Expand);
279 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
280 setOperationAction(ISD::FREM , MVT::f32, Expand);
281 setOperationAction(ISD::FPOW , MVT::f32, Expand);
282 if (Subtarget.hasSPE()) {
283 setOperationAction(ISD::FMA , MVT::f64, Expand);
284 setOperationAction(ISD::FMA , MVT::f32, Expand);
285 } else {
286 setOperationAction(ISD::FMA , MVT::f64, Legal);
287 setOperationAction(ISD::FMA , MVT::f32, Legal);
288 }
289
290 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
291
292 // If we're enabling GP optimizations, use hardware square root
293 if (!Subtarget.hasFSQRT() &&
294 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
295 Subtarget.hasFRE()))
296 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
297
298 if (!Subtarget.hasFSQRT() &&
299 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
300 Subtarget.hasFRES()))
301 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
302
303 if (Subtarget.hasFCPSGN()) {
304 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
305 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
306 } else {
307 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
308 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
309 }
310
311 if (Subtarget.hasFPRND()) {
312 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
313 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
314 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
315 setOperationAction(ISD::FROUND, MVT::f64, Legal);
316
317 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
318 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
319 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
320 setOperationAction(ISD::FROUND, MVT::f32, Legal);
321 }
322
323 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324 // to speed up scalar BSWAP64.
325 // CTPOP or CTTZ were introduced in P8/P9 respectively
326 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
327 if (Subtarget.hasP9Vector())
328 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
329 else
330 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
331 if (Subtarget.isISA3_0()) {
332 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
333 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
334 } else {
335 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
336 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
337 }
338
339 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
340 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
341 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
342 } else {
343 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
344 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
345 }
346
347 // PowerPC does not have ROTR
348 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
349 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
350
351 if (!Subtarget.useCRBits()) {
352 // PowerPC does not have Select
353 setOperationAction(ISD::SELECT, MVT::i32, Expand);
354 setOperationAction(ISD::SELECT, MVT::i64, Expand);
355 setOperationAction(ISD::SELECT, MVT::f32, Expand);
356 setOperationAction(ISD::SELECT, MVT::f64, Expand);
357 }
358
359 // PowerPC wants to turn select_cc of FP into fsel when possible.
360 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
361 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
362
363 // PowerPC wants to optimize integer setcc a bit
364 if (!Subtarget.useCRBits())
365 setOperationAction(ISD::SETCC, MVT::i32, Custom);
366
367 // PowerPC does not have BRCOND which requires SetCC
368 if (!Subtarget.useCRBits())
369 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
370
371 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
372
373 if (Subtarget.hasSPE()) {
374 // SPE has built-in conversions
375 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
376 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
378 } else {
379 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
381
382 // PowerPC does not have [U|S]INT_TO_FP
383 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
384 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
385 }
386
387 if (Subtarget.hasDirectMove() && isPPC64) {
388 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
389 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
390 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
391 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
392 } else {
393 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
394 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
395 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
396 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
397 }
398
399 // We cannot sextinreg(i1). Expand to shifts.
400 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
401
402 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404 // support continuation, user-level threading, and etc.. As a result, no
405 // other SjLj exception interfaces are implemented and please don't build
406 // your own exception handling based on them.
407 // LLVM/Clang supports zero-cost DWARF exception handling.
408 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
409 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
410
411 // We want to legalize GlobalAddress and ConstantPool nodes into the
412 // appropriate instructions to materialize the address.
413 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
414 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
415 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
416 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
417 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
418 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
419 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
420 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
421 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
422 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
423
424 // TRAP is legal.
425 setOperationAction(ISD::TRAP, MVT::Other, Legal);
426
427 // TRAMPOLINE is custom lowered.
428 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
429 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
430
431 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432 setOperationAction(ISD::VASTART , MVT::Other, Custom);
433
434 if (Subtarget.is64BitELFABI()) {
435 // VAARG always uses double-word chunks, so promote anything smaller.
436 setOperationAction(ISD::VAARG, MVT::i1, Promote);
437 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
438 setOperationAction(ISD::VAARG, MVT::i8, Promote);
439 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
440 setOperationAction(ISD::VAARG, MVT::i16, Promote);
441 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
442 setOperationAction(ISD::VAARG, MVT::i32, Promote);
443 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
444 setOperationAction(ISD::VAARG, MVT::Other, Expand);
445 } else if (Subtarget.is32BitELFABI()) {
446 // VAARG is custom lowered with the 32-bit SVR4 ABI.
447 setOperationAction(ISD::VAARG, MVT::Other, Custom);
448 setOperationAction(ISD::VAARG, MVT::i64, Custom);
449 } else
450 setOperationAction(ISD::VAARG, MVT::Other, Expand);
451
452 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
453 if (Subtarget.is32BitELFABI())
454 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
455 else
456 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
457
458 // Use the default implementation.
459 setOperationAction(ISD::VAEND , MVT::Other, Expand);
460 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
461 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
462 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
463 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
465 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
466 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
467 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
468
469 // We want to custom lower some of our intrinsics.
470 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
471
472 // To handle counter-based loop conditions.
473 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
474
475 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
476 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
477 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
478 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
479
480 // Comparisons that require checking two conditions.
481 if (Subtarget.hasSPE()) {
482 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
483 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
484 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
485 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
486 }
487 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
488 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
489 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
490 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
491 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
492 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
493 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
494 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
495 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
496 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
497 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
498 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
499
500 if (Subtarget.has64BitSupport()) {
501 // They also have instructions for converting between i64 and fp.
502 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
503 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
504 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
505 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
506 // This is just the low 32 bits of a (signed) fp->i64 conversion.
507 // We cannot do this with Promote because i64 is not a legal type.
508 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
509
510 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
511 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
512 } else {
513 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
514 if (Subtarget.hasSPE())
515 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
516 else
517 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
518 }
519
520 // With the instructions enabled under FPCVT, we can do everything.
521 if (Subtarget.hasFPCVT()) {
522 if (Subtarget.has64BitSupport()) {
523 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
524 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
525 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
526 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
527 }
528
529 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
530 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
531 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
532 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
533 }
534
535 if (Subtarget.use64BitRegs()) {
536 // 64-bit PowerPC implementations can support i64 types directly
537 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
538 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
539 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
540 // 64-bit PowerPC wants to expand i128 shifts itself.
541 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
542 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
543 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
544 } else {
545 // 32-bit PowerPC wants to expand i64 shifts itself.
546 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
547 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
548 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
549 }
550
551 if (Subtarget.hasAltivec()) {
552 // First set operation action for all vector types to expand. Then we
553 // will selectively turn on ones that can be effectively codegen'd.
554 for (MVT VT : MVT::vector_valuetypes()) {
555 // add/sub are legal for all supported vector VT's.
556 setOperationAction(ISD::ADD, VT, Legal);
557 setOperationAction(ISD::SUB, VT, Legal);
558
559 // For v2i64, these are only valid with P8Vector. This is corrected after
560 // the loop.
561 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
562 setOperationAction(ISD::SMAX, VT, Legal);
563 setOperationAction(ISD::SMIN, VT, Legal);
564 setOperationAction(ISD::UMAX, VT, Legal);
565 setOperationAction(ISD::UMIN, VT, Legal);
566 }
567 else {
568 setOperationAction(ISD::SMAX, VT, Expand);
569 setOperationAction(ISD::SMIN, VT, Expand);
570 setOperationAction(ISD::UMAX, VT, Expand);
571 setOperationAction(ISD::UMIN, VT, Expand);
572 }
573
574 if (Subtarget.hasVSX()) {
575 setOperationAction(ISD::FMAXNUM, VT, Legal);
576 setOperationAction(ISD::FMINNUM, VT, Legal);
577 }
578
579 // Vector instructions introduced in P8
580 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
581 setOperationAction(ISD::CTPOP, VT, Legal);
582 setOperationAction(ISD::CTLZ, VT, Legal);
583 }
584 else {
585 setOperationAction(ISD::CTPOP, VT, Expand);
586 setOperationAction(ISD::CTLZ, VT, Expand);
587 }
588
589 // Vector instructions introduced in P9
590 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
591 setOperationAction(ISD::CTTZ, VT, Legal);
592 else
593 setOperationAction(ISD::CTTZ, VT, Expand);
594
595 // We promote all shuffles to v16i8.
596 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
597 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
598
599 // We promote all non-typed operations to v4i32.
600 setOperationAction(ISD::AND , VT, Promote);
601 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
602 setOperationAction(ISD::OR , VT, Promote);
603 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
604 setOperationAction(ISD::XOR , VT, Promote);
605 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
606 setOperationAction(ISD::LOAD , VT, Promote);
607 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
608 setOperationAction(ISD::SELECT, VT, Promote);
609 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
610 setOperationAction(ISD::VSELECT, VT, Legal);
611 setOperationAction(ISD::SELECT_CC, VT, Promote);
612 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
613 setOperationAction(ISD::STORE, VT, Promote);
614 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
615
616 // No other operations are legal.
617 setOperationAction(ISD::MUL , VT, Expand);
618 setOperationAction(ISD::SDIV, VT, Expand);
619 setOperationAction(ISD::SREM, VT, Expand);
620 setOperationAction(ISD::UDIV, VT, Expand);
621 setOperationAction(ISD::UREM, VT, Expand);
622 setOperationAction(ISD::FDIV, VT, Expand);
623 setOperationAction(ISD::FREM, VT, Expand);
624 setOperationAction(ISD::FNEG, VT, Expand);
625 setOperationAction(ISD::FSQRT, VT, Expand);
626 setOperationAction(ISD::FLOG, VT, Expand);
627 setOperationAction(ISD::FLOG10, VT, Expand);
628 setOperationAction(ISD::FLOG2, VT, Expand);
629 setOperationAction(ISD::FEXP, VT, Expand);
630 setOperationAction(ISD::FEXP2, VT, Expand);
631 setOperationAction(ISD::FSIN, VT, Expand);
632 setOperationAction(ISD::FCOS, VT, Expand);
633 setOperationAction(ISD::FABS, VT, Expand);
634 setOperationAction(ISD::FFLOOR, VT, Expand);
635 setOperationAction(ISD::FCEIL, VT, Expand);
636 setOperationAction(ISD::FTRUNC, VT, Expand);
637 setOperationAction(ISD::FRINT, VT, Expand);
638 setOperationAction(ISD::FNEARBYINT, VT, Expand);
639 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
640 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
641 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
642 setOperationAction(ISD::MULHU, VT, Expand);
643 setOperationAction(ISD::MULHS, VT, Expand);
644 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
645 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
646 setOperationAction(ISD::UDIVREM, VT, Expand);
647 setOperationAction(ISD::SDIVREM, VT, Expand);
648 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
649 setOperationAction(ISD::FPOW, VT, Expand);
650 setOperationAction(ISD::BSWAP, VT, Expand);
651 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
652 setOperationAction(ISD::ROTL, VT, Expand);
653 setOperationAction(ISD::ROTR, VT, Expand);
654
655 for (MVT InnerVT : MVT::vector_valuetypes()) {
656 setTruncStoreAction(VT, InnerVT, Expand);
657 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
658 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
659 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
660 }
661 }
662 if (!Subtarget.hasP8Vector()) {
663 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
664 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
665 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
666 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
667 }
668
669 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
670 setOperationAction(ISD::ABS, VT, Custom);
671
672 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
673 // with merges, splats, etc.
674 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
675
676 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
677 // are cheap, so handle them before they get expanded to scalar.
678 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
679 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
680 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
681 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
682 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
683
684 setOperationAction(ISD::AND , MVT::v4i32, Legal);
685 setOperationAction(ISD::OR , MVT::v4i32, Legal);
686 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
687 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
688 setOperationAction(ISD::SELECT, MVT::v4i32,
689 Subtarget.useCRBits() ? Legal : Expand);
690 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
691 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
692 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
693 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
694 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
695 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
696 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
697 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
698 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
699
700 // Without hasP8Altivec set, v2i64 SMAX isn't available.
701 // But ABS custom lowering requires SMAX support.
702 if (!Subtarget.hasP8Altivec())
703 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
704
705 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
706 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
707 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
708 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
709
710 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
711 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
712
713 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
714 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
715 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
716 }
717
718 if (Subtarget.hasP8Altivec())
719 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
720 else
721 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
722
723 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
724 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
725
726 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
727 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
728
729 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
730 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
731 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
732 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
733
734 // Altivec does not contain unordered floating-point compare instructions
735 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
736 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
737 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
738 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
739
740 if (Subtarget.hasVSX()) {
741 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
742 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
743 if (Subtarget.hasP8Vector()) {
744 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
745 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
746 }
747 if (Subtarget.hasDirectMove() && isPPC64) {
748 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
749 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
750 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
751 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
752 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
753 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
756 }
757 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
758
759 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
760 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
761 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
762 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
763 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
764
765 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
766
767 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
768 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
769
770 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
771 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
772
773 // Share the Altivec comparison restrictions.
774 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
775 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
776 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
777 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
778
779 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
780 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
781
782 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
783
784 if (Subtarget.hasP8Vector())
785 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
786
787 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
788
789 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
790 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
791 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
792
793 if (Subtarget.hasP8Altivec()) {
794 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
795 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
796 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
797
798 // 128 bit shifts can be accomplished via 3 instructions for SHL and
799 // SRL, but not for SRA because of the instructions available:
800 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
801 // doing
802 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
803 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
804 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
805
806 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
807 }
808 else {
809 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
810 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
811 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
812
813 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
814
815 // VSX v2i64 only supports non-arithmetic operations.
816 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
817 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
818 }
819
820 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
821 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
822 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
823 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
824
825 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
826
827 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
828 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
829 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
830 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
831
832 // Custom handling for partial vectors of integers converted to
833 // floating point. We already have optimal handling for v2i32 through
834 // the DAG combine, so those aren't necessary.
835 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
836 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
837 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
838 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
839 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
840 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
841 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
842 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
843
844 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
845 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
846 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
847 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
848 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
849 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
850
851 if (Subtarget.hasDirectMove())
852 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
853 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
854
855 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
856 }
857
858 if (Subtarget.hasP8Altivec()) {
859 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
860 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
861 }
862
863 if (Subtarget.hasP9Vector()) {
864 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
865 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
866
867 // 128 bit shifts can be accomplished via 3 instructions for SHL and
868 // SRL, but not for SRA because of the instructions available:
869 // VS{RL} and VS{RL}O.
870 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
871 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
872 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
873
874 if (EnableQuadPrecision) {
875 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
876 setOperationAction(ISD::FADD, MVT::f128, Legal);
877 setOperationAction(ISD::FSUB, MVT::f128, Legal);
878 setOperationAction(ISD::FDIV, MVT::f128, Legal);
879 setOperationAction(ISD::FMUL, MVT::f128, Legal);
880 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
881 // No extending loads to f128 on PPC.
882 for (MVT FPT : MVT::fp_valuetypes())
883 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
884 setOperationAction(ISD::FMA, MVT::f128, Legal);
885 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
886 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
887 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
888 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
889 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
890 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
891
892 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
893 setOperationAction(ISD::FRINT, MVT::f128, Legal);
894 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
895 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
896 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
897 setOperationAction(ISD::FROUND, MVT::f128, Legal);
898
899 setOperationAction(ISD::SELECT, MVT::f128, Expand);
900 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
901 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
902 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
903 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
904 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
905 // No implementation for these ops for PowerPC.
906 setOperationAction(ISD::FSIN , MVT::f128, Expand);
907 setOperationAction(ISD::FCOS , MVT::f128, Expand);
908 setOperationAction(ISD::FPOW, MVT::f128, Expand);
909 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
910 setOperationAction(ISD::FREM, MVT::f128, Expand);
911 }
912 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
913
914 }
915
916 if (Subtarget.hasP9Altivec()) {
917 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
918 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
919 }
920 }
921
922 if (Subtarget.hasQPX()) {
923 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
924 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
925 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
926 setOperationAction(ISD::FREM, MVT::v4f64, Expand);
927
928 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
929 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
930
931 setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
932 setOperationAction(ISD::STORE , MVT::v4f64, Custom);
933
934 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
935 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
936
937 if (!Subtarget.useCRBits())
938 setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
939 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
940
941 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
942 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
943 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
944 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
945 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
946 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
947 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
948
949 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
950 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
951
952 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
953 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
954
955 setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
956 setOperationAction(ISD::FABS , MVT::v4f64, Legal);
957 setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
958 setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
959 setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
960 setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
961 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
962 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
963 setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
964 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
965
966 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
967 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
968
969 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
970 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
971
972 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
973
974 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
975 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
976 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
977 setOperationAction(ISD::FREM, MVT::v4f32, Expand);
978
979 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
980 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
981
982 setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
983 setOperationAction(ISD::STORE , MVT::v4f32, Custom);
984
985 if (!Subtarget.useCRBits())
986 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
987 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
988
989 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
990 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
991 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
992 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
993 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
994 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
995 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
996
997 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
998 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
999
1000 setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
1001 setOperationAction(ISD::FABS , MVT::v4f32, Legal);
1002 setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
1003 setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
1004 setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1005 setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1006 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1007 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1008 setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1009 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1010
1011 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1012 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1013
1014 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1015 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1016
1017 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1018
1019 setOperationAction(ISD::AND , MVT::v4i1, Legal);
1020 setOperationAction(ISD::OR , MVT::v4i1, Legal);
1021 setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1022
1023 if (!Subtarget.useCRBits())
1024 setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1025 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1026
1027 setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
1028 setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1029
1030 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1031 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1032 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1033 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1034 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1035 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1036 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1037
1038 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1039 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1040
1041 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1042
1043 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1044 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1045 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1046 setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1047
1048 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1051 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1052
1053 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1054 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1055
1056 // These need to set FE_INEXACT, and so cannot be vectorized here.
1057 setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1058 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1059
1060 if (TM.Options.UnsafeFPMath) {
1061 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1062 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1063
1064 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1065 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1066 } else {
1067 setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1068 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1069
1070 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1071 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1072 }
1073 }
1074
1075 if (Subtarget.has64BitSupport())
1076 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1077
1078 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1079
1080 if (!isPPC64) {
1081 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1082 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1083 }
1084
1085 setBooleanContents(ZeroOrOneBooleanContent);
1086
1087 if (Subtarget.hasAltivec()) {
1088 // Altivec instructions set fields to all zeros or all ones.
1089 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1090 }
1091
1092 if (!isPPC64) {
1093 // These libcalls are not available in 32-bit.
1094 setLibcallName(RTLIB::SHL_I128, nullptr);
1095 setLibcallName(RTLIB::SRL_I128, nullptr);
1096 setLibcallName(RTLIB::SRA_I128, nullptr);
1097 }
1098
1099 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1100
1101 // We have target-specific dag combine patterns for the following nodes:
1102 setTargetDAGCombine(ISD::ADD);
1103 setTargetDAGCombine(ISD::SHL);
1104 setTargetDAGCombine(ISD::SRA);
1105 setTargetDAGCombine(ISD::SRL);
1106 setTargetDAGCombine(ISD::MUL);
1107 setTargetDAGCombine(ISD::SINT_TO_FP);
1108 setTargetDAGCombine(ISD::BUILD_VECTOR);
1109 if (Subtarget.hasFPCVT())
1110 setTargetDAGCombine(ISD::UINT_TO_FP);
1111 setTargetDAGCombine(ISD::LOAD);
1112 setTargetDAGCombine(ISD::STORE);
1113 setTargetDAGCombine(ISD::BR_CC);
1114 if (Subtarget.useCRBits())
1115 setTargetDAGCombine(ISD::BRCOND);
1116 setTargetDAGCombine(ISD::BSWAP);
1117 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1118 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1119 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1120
1121 setTargetDAGCombine(ISD::SIGN_EXTEND);
1122 setTargetDAGCombine(ISD::ZERO_EXTEND);
1123 setTargetDAGCombine(ISD::ANY_EXTEND);
1124
1125 setTargetDAGCombine(ISD::TRUNCATE);
1126 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1127
1128
1129 if (Subtarget.useCRBits()) {
1130 setTargetDAGCombine(ISD::TRUNCATE);
1131 setTargetDAGCombine(ISD::SETCC);
1132 setTargetDAGCombine(ISD::SELECT_CC);
1133 }
1134
1135 // Use reciprocal estimates.
1136 if (TM.Options.UnsafeFPMath) {
1137 setTargetDAGCombine(ISD::FDIV);
1138 setTargetDAGCombine(ISD::FSQRT);
1139 }
1140
1141 if (Subtarget.hasP9Altivec()) {
1142 setTargetDAGCombine(ISD::ABS);
1143 setTargetDAGCombine(ISD::VSELECT);
1144 }
1145
1146 // Darwin long double math library functions have $LDBL128 appended.
1147 if (Subtarget.isDarwin()) {
1148 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1149 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1150 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1151 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1152 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1153 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1154 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1155 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1156 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1157 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1158 }
1159
1160 if (EnableQuadPrecision) {
1161 setLibcallName(RTLIB::LOG_F128, "logf128");
1162 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1163 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1164 setLibcallName(RTLIB::EXP_F128, "expf128");
1165 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1166 setLibcallName(RTLIB::SIN_F128, "sinf128");
1167 setLibcallName(RTLIB::COS_F128, "cosf128");
1168 setLibcallName(RTLIB::POW_F128, "powf128");
1169 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1170 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1171 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1172 setLibcallName(RTLIB::REM_F128, "fmodf128");
1173 }
1174
1175 // With 32 condition bits, we don't need to sink (and duplicate) compares
1176 // aggressively in CodeGenPrep.
1177 if (Subtarget.useCRBits()) {
1178 setHasMultipleConditionRegisters();
1179 setJumpIsExpensive();
1180 }
1181
1182 setMinFunctionAlignment(llvm::Align(4));
1183 if (Subtarget.isDarwin())
1184 setPrefFunctionAlignment(llvm::Align(16));
1185
1186 switch (Subtarget.getDarwinDirective()) {
1187 default: break;
1188 case PPC::DIR_970:
1189 case PPC::DIR_A2:
1190 case PPC::DIR_E500:
1191 case PPC::DIR_E500mc:
1192 case PPC::DIR_E5500:
1193 case PPC::DIR_PWR4:
1194 case PPC::DIR_PWR5:
1195 case PPC::DIR_PWR5X:
1196 case PPC::DIR_PWR6:
1197 case PPC::DIR_PWR6X:
1198 case PPC::DIR_PWR7:
1199 case PPC::DIR_PWR8:
1200 case PPC::DIR_PWR9:
1201 setPrefLoopAlignment(llvm::Align(16));
1202 setPrefFunctionAlignment(llvm::Align(16));
1203 break;
1204 }
1205
1206 if (Subtarget.enableMachineScheduler())
1207 setSchedulingPreference(Sched::Source);
1208 else
1209 setSchedulingPreference(Sched::Hybrid);
1210
1211 computeRegisterProperties(STI.getRegisterInfo());
1212
1213 // The Freescale cores do better with aggressive inlining of memcpy and
1214 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1215 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1216 Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1217 MaxStoresPerMemset = 32;
1218 MaxStoresPerMemsetOptSize = 16;
1219 MaxStoresPerMemcpy = 32;
1220 MaxStoresPerMemcpyOptSize = 8;
1221 MaxStoresPerMemmove = 32;
1222 MaxStoresPerMemmoveOptSize = 8;
1223 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1224 // The A2 also benefits from (very) aggressive inlining of memcpy and
1225 // friends. The overhead of a the function call, even when warm, can be
1226 // over one hundred cycles.
1227 MaxStoresPerMemset = 128;
1228 MaxStoresPerMemcpy = 128;
1229 MaxStoresPerMemmove = 128;
1230 MaxLoadsPerMemcmp = 128;
1231 } else {
1232 MaxLoadsPerMemcmp = 8;
1233 MaxLoadsPerMemcmpOptSize = 4;
1234 }
1235}
1236
1237/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1238/// the desired ByVal argument alignment.
1239static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1240 unsigned MaxMaxAlign) {
1241 if (MaxAlign == MaxMaxAlign)
1242 return;
1243 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1244 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1245 MaxAlign = 32;
1246 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1247 MaxAlign = 16;
1248 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1249 unsigned EltAlign = 0;
1250 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1251 if (EltAlign > MaxAlign)
1252 MaxAlign = EltAlign;
1253 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1254 for (auto *EltTy : STy->elements()) {
1255 unsigned EltAlign = 0;
1256 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1257 if (EltAlign > MaxAlign)
1258 MaxAlign = EltAlign;
1259 if (MaxAlign == MaxMaxAlign)
1260 break;
1261 }
1262 }
1263}
1264
1265/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1266/// function arguments in the caller parameter area.
1267unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1268 const DataLayout &DL) const {
1269 // Darwin passes everything on 4 byte boundary.
1270 if (Subtarget.isDarwin())
1271 return 4;
1272
1273 // 16byte and wider vectors are passed on 16byte boundary.
1274 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1275 unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1276 if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1277 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1278 return Align;
1279}
1280
1281bool PPCTargetLowering::useSoftFloat() const {
1282 return Subtarget.useSoftFloat();
1283}
1284
1285bool PPCTargetLowering::hasSPE() const {
1286 return Subtarget.hasSPE();
1287}
1288
1289bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1290 return VT.isScalarInteger();
1291}
1292
1293const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1294 switch ((PPCISD::NodeType)Opcode) {
1295 case PPCISD::FIRST_NUMBER: break;
1296 case PPCISD::FSEL: return "PPCISD::FSEL";
1297 case PPCISD::FCFID: return "PPCISD::FCFID";
1298 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1299 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1300 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1301 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1302 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1303 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1304 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1305 case PPCISD::FP_TO_UINT_IN_VSR:
1306 return "PPCISD::FP_TO_UINT_IN_VSR,";
1307 case PPCISD::FP_TO_SINT_IN_VSR:
1308 return "PPCISD::FP_TO_SINT_IN_VSR";
1309 case PPCISD::FRE: return "PPCISD::FRE";
1310 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1311 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1312 case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
1313 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
1314 case PPCISD::VPERM: return "PPCISD::VPERM";
1315 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1316 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1317 case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
1318 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1319 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1320 case PPCISD::CMPB: return "PPCISD::CMPB";
1321 case PPCISD::Hi: return "PPCISD::Hi";
1322 case PPCISD::Lo: return "PPCISD::Lo";
1323 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1324 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1325 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1326 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1327 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1328 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1329 case PPCISD::SRL: return "PPCISD::SRL";
1330 case PPCISD::SRA: return "PPCISD::SRA";
1331 case PPCISD::SHL: return "PPCISD::SHL";
1332 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1333 case PPCISD::CALL: return "PPCISD::CALL";
1334 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1335 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1336 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1337 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1338 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1339 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1340 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1341 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1342 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1343 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1344 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1345 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1346 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1347 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1348 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1349 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1350 case PPCISD::VCMP: return "PPCISD::VCMP";
1351 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1352 case PPCISD::LBRX: return "PPCISD::LBRX";
1353 case PPCISD::STBRX: return "PPCISD::STBRX";
1354 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1355 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1356 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1357 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1358 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1359 case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
1360 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1361 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1362 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1363 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1364 case PPCISD::ST_VSR_SCAL_INT:
1365 return "PPCISD::ST_VSR_SCAL_INT";
1366 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1367 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1368 case PPCISD::BDZ: return "PPCISD::BDZ";
1369 case PPCISD::MFFS: return "PPCISD::MFFS";
1370 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1371 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1372 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1373 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1374 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1375 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1376 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1377 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1378 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1379 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1380 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1381 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1382 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1383 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1384 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1385 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1386 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1387 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1388 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1389 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1390 case PPCISD::SC: return "PPCISD::SC";
1391 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1392 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1393 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1394 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1395 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1396 case PPCISD::VABSD: return "PPCISD::VABSD";
1397 case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1398 case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1399 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1400 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1401 case PPCISD::QBFLT: return "PPCISD::QBFLT";
1402 case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1403 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1404 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1405 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1406 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1407 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1408 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1409 }
1410 return nullptr;
1411}
1412
1413EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1414 EVT VT) const {
1415 if (!VT.isVector())
1416 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1417
1418 if (Subtarget.hasQPX())
1419 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1420
1421 return VT.changeVectorElementTypeToInteger();
1422}
1423
1424bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1425 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1425, __PRETTY_FUNCTION__))
;
1426 return true;
1427}
1428
1429//===----------------------------------------------------------------------===//
1430// Node matching predicates, for use by the tblgen matching code.
1431//===----------------------------------------------------------------------===//
1432
1433/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1434static bool isFloatingPointZero(SDValue Op) {
1435 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1436 return CFP->getValueAPF().isZero();
1437 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1438 // Maybe this has already been legalized into the constant pool?
1439 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1440 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1441 return CFP->getValueAPF().isZero();
1442 }
1443 return false;
1444}
1445
1446/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1447/// true if Op is undef or if it matches the specified value.
1448static bool isConstantOrUndef(int Op, int Val) {
1449 return Op < 0 || Op == Val;
1450}
1451
1452/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1453/// VPKUHUM instruction.
1454/// The ShuffleKind distinguishes between big-endian operations with
1455/// two different inputs (0), either-endian operations with two identical
1456/// inputs (1), and little-endian operations with two different inputs (2).
1457/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1458bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1459 SelectionDAG &DAG) {
1460 bool IsLE = DAG.getDataLayout().isLittleEndian();
1461 if (ShuffleKind == 0) {
1462 if (IsLE)
1463 return false;
1464 for (unsigned i = 0; i != 16; ++i)
1465 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1466 return false;
1467 } else if (ShuffleKind == 2) {
1468 if (!IsLE)
1469 return false;
1470 for (unsigned i = 0; i != 16; ++i)
1471 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1472 return false;
1473 } else if (ShuffleKind == 1) {
1474 unsigned j = IsLE ? 0 : 1;
1475 for (unsigned i = 0; i != 8; ++i)
1476 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1477 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1478 return false;
1479 }
1480 return true;
1481}
1482
1483/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1484/// VPKUWUM instruction.
1485/// The ShuffleKind distinguishes between big-endian operations with
1486/// two different inputs (0), either-endian operations with two identical
1487/// inputs (1), and little-endian operations with two different inputs (2).
1488/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1489bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1490 SelectionDAG &DAG) {
1491 bool IsLE = DAG.getDataLayout().isLittleEndian();
1492 if (ShuffleKind == 0) {
1493 if (IsLE)
1494 return false;
1495 for (unsigned i = 0; i != 16; i += 2)
1496 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1497 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1498 return false;
1499 } else if (ShuffleKind == 2) {
1500 if (!IsLE)
1501 return false;
1502 for (unsigned i = 0; i != 16; i += 2)
1503 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1504 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1505 return false;
1506 } else if (ShuffleKind == 1) {
1507 unsigned j = IsLE ? 0 : 2;
1508 for (unsigned i = 0; i != 8; i += 2)
1509 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1510 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1511 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1512 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1513 return false;
1514 }
1515 return true;
1516}
1517
1518/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1519/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1520/// current subtarget.
1521///
1522/// The ShuffleKind distinguishes between big-endian operations with
1523/// two different inputs (0), either-endian operations with two identical
1524/// inputs (1), and little-endian operations with two different inputs (2).
1525/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1526bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1527 SelectionDAG &DAG) {
1528 const PPCSubtarget& Subtarget =
1529 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1530 if (!Subtarget.hasP8Vector())
1531 return false;
1532
1533 bool IsLE = DAG.getDataLayout().isLittleEndian();
1534 if (ShuffleKind == 0) {
1535 if (IsLE)
1536 return false;
1537 for (unsigned i = 0; i != 16; i += 4)
1538 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1539 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1540 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1541 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1542 return false;
1543 } else if (ShuffleKind == 2) {
1544 if (!IsLE)
1545 return false;
1546 for (unsigned i = 0; i != 16; i += 4)
1547 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1548 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1549 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1550 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1551 return false;
1552 } else if (ShuffleKind == 1) {
1553 unsigned j = IsLE ? 0 : 4;
1554 for (unsigned i = 0; i != 8; i += 4)
1555 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1556 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1557 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1558 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1559 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1560 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1561 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1562 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1563 return false;
1564 }
1565 return true;
1566}
1567
1568/// isVMerge - Common function, used to match vmrg* shuffles.
1569///
1570static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1571 unsigned LHSStart, unsigned RHSStart) {
1572 if (N->getValueType(0) != MVT::v16i8)
1573 return false;
1574 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1575, __PRETTY_FUNCTION__))
1575 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1575, __PRETTY_FUNCTION__))
;
1576
1577 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1578 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1579 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1580 LHSStart+j+i*UnitSize) ||
1581 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1582 RHSStart+j+i*UnitSize))
1583 return false;
1584 }
1585 return true;
1586}
1587
1588/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1589/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1590/// The ShuffleKind distinguishes between big-endian merges with two
1591/// different inputs (0), either-endian merges with two identical inputs (1),
1592/// and little-endian merges with two different inputs (2). For the latter,
1593/// the input operands are swapped (see PPCInstrAltivec.td).
1594bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1595 unsigned ShuffleKind, SelectionDAG &DAG) {
1596 if (DAG.getDataLayout().isLittleEndian()) {
1597 if (ShuffleKind == 1) // unary
1598 return isVMerge(N, UnitSize, 0, 0);
1599 else if (ShuffleKind == 2) // swapped
1600 return isVMerge(N, UnitSize, 0, 16);
1601 else
1602 return false;
1603 } else {
1604 if (ShuffleKind == 1) // unary
1605 return isVMerge(N, UnitSize, 8, 8);
1606 else if (ShuffleKind == 0) // normal
1607 return isVMerge(N, UnitSize, 8, 24);
1608 else
1609 return false;
1610 }
1611}
1612
1613/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1614/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1615/// The ShuffleKind distinguishes between big-endian merges with two
1616/// different inputs (0), either-endian merges with two identical inputs (1),
1617/// and little-endian merges with two different inputs (2). For the latter,
1618/// the input operands are swapped (see PPCInstrAltivec.td).
1619bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1620 unsigned ShuffleKind, SelectionDAG &DAG) {
1621 if (DAG.getDataLayout().isLittleEndian()) {
1622 if (ShuffleKind == 1) // unary
1623 return isVMerge(N, UnitSize, 8, 8);
1624 else if (ShuffleKind == 2) // swapped
1625 return isVMerge(N, UnitSize, 8, 24);
1626 else
1627 return false;
1628 } else {
1629 if (ShuffleKind == 1) // unary
1630 return isVMerge(N, UnitSize, 0, 0);
1631 else if (ShuffleKind == 0) // normal
1632 return isVMerge(N, UnitSize, 0, 16);
1633 else
1634 return false;
1635 }
1636}
1637
1638/**
1639 * Common function used to match vmrgew and vmrgow shuffles
1640 *
1641 * The indexOffset determines whether to look for even or odd words in
1642 * the shuffle mask. This is based on the of the endianness of the target
1643 * machine.
1644 * - Little Endian:
1645 * - Use offset of 0 to check for odd elements
1646 * - Use offset of 4 to check for even elements
1647 * - Big Endian:
1648 * - Use offset of 0 to check for even elements
1649 * - Use offset of 4 to check for odd elements
1650 * A detailed description of the vector element ordering for little endian and
1651 * big endian can be found at
1652 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1653 * Targeting your applications - what little endian and big endian IBM XL C/C++
1654 * compiler differences mean to you
1655 *
1656 * The mask to the shuffle vector instruction specifies the indices of the
1657 * elements from the two input vectors to place in the result. The elements are
1658 * numbered in array-access order, starting with the first vector. These vectors
1659 * are always of type v16i8, thus each vector will contain 16 elements of size
1660 * 8. More info on the shuffle vector can be found in the
1661 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1662 * Language Reference.
1663 *
1664 * The RHSStartValue indicates whether the same input vectors are used (unary)
1665 * or two different input vectors are used, based on the following:
1666 * - If the instruction uses the same vector for both inputs, the range of the
1667 * indices will be 0 to 15. In this case, the RHSStart value passed should
1668 * be 0.
1669 * - If the instruction has two different vectors then the range of the
1670 * indices will be 0 to 31. In this case, the RHSStart value passed should
1671 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1672 * to 31 specify elements in the second vector).
1673 *
1674 * \param[in] N The shuffle vector SD Node to analyze
1675 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1676 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1677 * vector to the shuffle_vector instruction
1678 * \return true iff this shuffle vector represents an even or odd word merge
1679 */
1680static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1681 unsigned RHSStartValue) {
1682 if (N->getValueType(0) != MVT::v16i8)
1683 return false;
1684
1685 for (unsigned i = 0; i < 2; ++i)
1686 for (unsigned j = 0; j < 4; ++j)
1687 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1688 i*RHSStartValue+j+IndexOffset) ||
1689 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1690 i*RHSStartValue+j+IndexOffset+8))
1691 return false;
1692 return true;
1693}
1694
1695/**
1696 * Determine if the specified shuffle mask is suitable for the vmrgew or
1697 * vmrgow instructions.
1698 *
1699 * \param[in] N The shuffle vector SD Node to analyze
1700 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1701 * \param[in] ShuffleKind Identify the type of merge:
1702 * - 0 = big-endian merge with two different inputs;
1703 * - 1 = either-endian merge with two identical inputs;
1704 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1705 * little-endian merges).
1706 * \param[in] DAG The current SelectionDAG
1707 * \return true iff this shuffle mask
1708 */
1709bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1710 unsigned ShuffleKind, SelectionDAG &DAG) {
1711 if (DAG.getDataLayout().isLittleEndian()) {
1712 unsigned indexOffset = CheckEven ? 4 : 0;
1713 if (ShuffleKind == 1) // Unary
1714 return isVMerge(N, indexOffset, 0);
1715 else if (ShuffleKind == 2) // swapped
1716 return isVMerge(N, indexOffset, 16);
1717 else
1718 return false;
1719 }
1720 else {
1721 unsigned indexOffset = CheckEven ? 0 : 4;
1722 if (ShuffleKind == 1) // Unary
1723 return isVMerge(N, indexOffset, 0);
1724 else if (ShuffleKind == 0) // Normal
1725 return isVMerge(N, indexOffset, 16);
1726 else
1727 return false;
1728 }
1729 return false;
1730}
1731
1732/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1733/// amount, otherwise return -1.
1734/// The ShuffleKind distinguishes between big-endian operations with two
1735/// different inputs (0), either-endian operations with two identical inputs
1736/// (1), and little-endian operations with two different inputs (2). For the
1737/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1738int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1739 SelectionDAG &DAG) {
1740 if (N->getValueType(0) != MVT::v16i8)
1741 return -1;
1742
1743 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1744
1745 // Find the first non-undef value in the shuffle mask.
1746 unsigned i;
1747 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1748 /*search*/;
1749
1750 if (i == 16) return -1; // all undef.
1751
1752 // Otherwise, check to see if the rest of the elements are consecutively
1753 // numbered from this value.
1754 unsigned ShiftAmt = SVOp->getMaskElt(i);
1755 if (ShiftAmt < i) return -1;
1756
1757 ShiftAmt -= i;
1758 bool isLE = DAG.getDataLayout().isLittleEndian();
1759
1760 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1761 // Check the rest of the elements to see if they are consecutive.
1762 for (++i; i != 16; ++i)
1763 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1764 return -1;
1765 } else if (ShuffleKind == 1) {
1766 // Check the rest of the elements to see if they are consecutive.
1767 for (++i; i != 16; ++i)
1768 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1769 return -1;
1770 } else
1771 return -1;
1772
1773 if (isLE)
1774 ShiftAmt = 16 - ShiftAmt;
1775
1776 return ShiftAmt;
1777}
1778
1779/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1780/// specifies a splat of a single element that is suitable for input to
1781/// VSPLTB/VSPLTH/VSPLTW.
1782bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1783 assert(N->getValueType(0) == MVT::v16i8 &&((N->getValueType(0) == MVT::v16i8 && (EltSize == 1
|| EltSize == 2 || EltSize == 4)) ? static_cast<void> (
0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1784, __PRETTY_FUNCTION__))
1784 (EltSize == 1 || EltSize == 2 || EltSize == 4))((N->getValueType(0) == MVT::v16i8 && (EltSize == 1
|| EltSize == 2 || EltSize == 4)) ? static_cast<void> (
0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1784, __PRETTY_FUNCTION__))
;
1785
1786 // The consecutive indices need to specify an element, not part of two
1787 // different elements. So abandon ship early if this isn't the case.
1788 if (N->getMaskElt(0) % EltSize != 0)
1789 return false;
1790
1791 // This is a splat operation if each element of the permute is the same, and
1792 // if the value doesn't reference the second vector.
1793 unsigned ElementBase = N->getMaskElt(0);
1794
1795 // FIXME: Handle UNDEF elements too!
1796 if (ElementBase >= 16)
1797 return false;
1798
1799 // Check that the indices are consecutive, in the case of a multi-byte element
1800 // splatted with a v16i8 mask.
1801 for (unsigned i = 1; i != EltSize; ++i)
1802 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1803 return false;
1804
1805 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1806 if (N->getMaskElt(i) < 0) continue;
1807 for (unsigned j = 0; j != EltSize; ++j)
1808 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1809 return false;
1810 }
1811 return true;
1812}
1813
1814/// Check that the mask is shuffling N byte elements. Within each N byte
1815/// element of the mask, the indices could be either in increasing or
1816/// decreasing order as long as they are consecutive.
1817/// \param[in] N the shuffle vector SD Node to analyze
1818/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1819/// Word/DoubleWord/QuadWord).
1820/// \param[in] StepLen the delta indices number among the N byte element, if
1821/// the mask is in increasing/decreasing order then it is 1/-1.
1822/// \return true iff the mask is shuffling N byte elements.
1823static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1824 int StepLen) {
1825 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1826, __PRETTY_FUNCTION__))
1826 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1826, __PRETTY_FUNCTION__))
;
1827 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1827, __PRETTY_FUNCTION__))
;
1828
1829 unsigned NumOfElem = 16 / Width;
1830 unsigned MaskVal[16]; // Width is never greater than 16
1831 for (unsigned i = 0; i < NumOfElem; ++i) {
1832 MaskVal[0] = N->getMaskElt(i * Width);
1833 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1834 return false;
1835 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1836 return false;
1837 }
1838
1839 for (unsigned int j = 1; j < Width; ++j) {
1840 MaskVal[j] = N->getMaskElt(i * Width + j);
1841 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1842 return false;
1843 }
1844 }
1845 }
1846
1847 return true;
1848}
1849
1850bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1851 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1852 if (!isNByteElemShuffleMask(N, 4, 1))
1853 return false;
1854
1855 // Now we look at mask elements 0,4,8,12
1856 unsigned M0 = N->getMaskElt(0) / 4;
1857 unsigned M1 = N->getMaskElt(4) / 4;
1858 unsigned M2 = N->getMaskElt(8) / 4;
1859 unsigned M3 = N->getMaskElt(12) / 4;
1860 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1861 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1862
1863 // Below, let H and L be arbitrary elements of the shuffle mask
1864 // where H is in the range [4,7] and L is in the range [0,3].
1865 // H, 1, 2, 3 or L, 5, 6, 7
1866 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1867 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1868 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1869 InsertAtByte = IsLE ? 12 : 0;
1870 Swap = M0 < 4;
1871 return true;
1872 }
1873 // 0, H, 2, 3 or 4, L, 6, 7
1874 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1875 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1876 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1877 InsertAtByte = IsLE ? 8 : 4;
1878 Swap = M1 < 4;
1879 return true;
1880 }
1881 // 0, 1, H, 3 or 4, 5, L, 7
1882 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1883 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1884 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1885 InsertAtByte = IsLE ? 4 : 8;
1886 Swap = M2 < 4;
1887 return true;
1888 }
1889 // 0, 1, 2, H or 4, 5, 6, L
1890 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1891 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1892 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1893 InsertAtByte = IsLE ? 0 : 12;
1894 Swap = M3 < 4;
1895 return true;
1896 }
1897
1898 // If both vector operands for the shuffle are the same vector, the mask will
1899 // contain only elements from the first one and the second one will be undef.
1900 if (N->getOperand(1).isUndef()) {
1901 ShiftElts = 0;
1902 Swap = true;
1903 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1904 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1905 InsertAtByte = IsLE ? 12 : 0;
1906 return true;
1907 }
1908 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1909 InsertAtByte = IsLE ? 8 : 4;
1910 return true;
1911 }
1912 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1913 InsertAtByte = IsLE ? 4 : 8;
1914 return true;
1915 }
1916 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1917 InsertAtByte = IsLE ? 0 : 12;
1918 return true;
1919 }
1920 }
1921
1922 return false;
1923}
1924
1925bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1926 bool &Swap, bool IsLE) {
1927 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1927, __PRETTY_FUNCTION__))
;
7
'?' condition is true
1928 // Ensure each byte index of the word is consecutive.
1929 if (!isNByteElemShuffleMask(N, 4, 1))
8
Assuming the condition is false
9
Taking false branch
1930 return false;
1931
1932 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1933 unsigned M0 = N->getMaskElt(0) / 4;
1934 unsigned M1 = N->getMaskElt(4) / 4;
1935 unsigned M2 = N->getMaskElt(8) / 4;
1936 unsigned M3 = N->getMaskElt(12) / 4;
1937
1938 // If both vector operands for the shuffle are the same vector, the mask will
1939 // contain only elements from the first one and the second one will be undef.
1940 if (N->getOperand(1).isUndef()) {
10
Calling 'SDValue::isUndef'
16
Returning from 'SDValue::isUndef'
17
Taking false branch
1941 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1941, __PRETTY_FUNCTION__))
;
1942 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1943 return false;
1944
1945 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1946 Swap = false;
1947 return true;
1948 }
1949
1950 // Ensure each word index of the ShuffleVector Mask is consecutive.
1951 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
18
Assuming the condition is false
19
Assuming the condition is false
20
Assuming the condition is false
21
Taking false branch
1952 return false;
1953
1954 if (IsLE) {
22
Assuming 'IsLE' is false
23
Taking false branch
1955 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1956 // Input vectors don't need to be swapped if the leading element
1957 // of the result is one of the 3 left elements of the second vector
1958 // (or if there is no shift to be done at all).
1959 Swap = false;
1960 ShiftElts = (8 - M0) % 8;
1961 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1962 // Input vectors need to be swapped if the leading element
1963 // of the result is one of the 3 left elements of the first vector
1964 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1965 Swap = true;
1966 ShiftElts = (4 - M0) % 4;
1967 }
1968
1969 return true;
1970 } else { // BE
1971 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
24
Assuming 'M0' is not equal to 0
25
Assuming 'M0' is not equal to 1
26
Assuming 'M0' is not equal to 2
27
Assuming 'M0' is not equal to 3
28
Taking false branch
1972 // Input vectors don't need to be swapped if the leading element
1973 // of the result is one of the 4 elements of the first vector.
1974 Swap = false;
1975 ShiftElts = M0;
1976 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
29
Assuming 'M0' is not equal to 4
30
Assuming 'M0' is not equal to 5
31
Assuming 'M0' is not equal to 6
32
Assuming 'M0' is not equal to 7
33
Taking false branch
1977 // Input vectors need to be swapped if the leading element
1978 // of the result is one of the 4 elements of the right vector.
1979 Swap = true;
1980 ShiftElts = M0 - 4;
1981 }
1982
1983 return true;
34
Returning without writing to 'ShiftElts'
35
Returning the value 1, which participates in a condition later
1984 }
1985}
1986
1987bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1988 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1988, __PRETTY_FUNCTION__))
;
1989
1990 if (!isNByteElemShuffleMask(N, Width, -1))
1991 return false;
1992
1993 for (int i = 0; i < 16; i += Width)
1994 if (N->getMaskElt(i) != i + Width - 1)
1995 return false;
1996
1997 return true;
1998}
1999
2000bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2001 return isXXBRShuffleMaskHelper(N, 2);
2002}
2003
2004bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2005 return isXXBRShuffleMaskHelper(N, 4);
2006}
2007
2008bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2009 return isXXBRShuffleMaskHelper(N, 8);
2010}
2011
2012bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2013 return isXXBRShuffleMaskHelper(N, 16);
2014}
2015
2016/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2017/// if the inputs to the instruction should be swapped and set \p DM to the
2018/// value for the immediate.
2019/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2020/// AND element 0 of the result comes from the first input (LE) or second input
2021/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2022/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2023/// mask.
2024bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2025 bool &Swap, bool IsLE) {
2026 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2026, __PRETTY_FUNCTION__))
;
2027
2028 // Ensure each byte index of the double word is consecutive.
2029 if (!isNByteElemShuffleMask(N, 8, 1))
2030 return false;
2031
2032 unsigned M0 = N->getMaskElt(0) / 8;
2033 unsigned M1 = N->getMaskElt(8) / 8;
2034 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2034, __PRETTY_FUNCTION__))
;
2035
2036 // If both vector operands for the shuffle are the same vector, the mask will
2037 // contain only elements from the first one and the second one will be undef.
2038 if (N->getOperand(1).isUndef()) {
2039 if ((M0 | M1) < 2) {
2040 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2041 Swap = false;
2042 return true;
2043 } else
2044 return false;
2045 }
2046
2047 if (IsLE) {
2048 if (M0 > 1 && M1 < 2) {
2049 Swap = false;
2050 } else if (M0 < 2 && M1 > 1) {
2051 M0 = (M0 + 2) % 4;
2052 M1 = (M1 + 2) % 4;
2053 Swap = true;
2054 } else
2055 return false;
2056
2057 // Note: if control flow comes here that means Swap is already set above
2058 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2059 return true;
2060 } else { // BE
2061 if (M0 < 2 && M1 > 1) {
2062 Swap = false;
2063 } else if (M0 > 1 && M1 < 2) {
2064 M0 = (M0 + 2) % 4;
2065 M1 = (M1 + 2) % 4;
2066 Swap = true;
2067 } else
2068 return false;
2069
2070 // Note: if control flow comes here that means Swap is already set above
2071 DM = (M0 << 1) + (M1 & 1);
2072 return true;
2073 }
2074}
2075
2076
2077/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2078/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2079unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2080 SelectionDAG &DAG) {
2081 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2082 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2082, __PRETTY_FUNCTION__))
;
2083 if (DAG.getDataLayout().isLittleEndian())
2084 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2085 else
2086 return SVOp->getMaskElt(0) / EltSize;
2087}
2088
2089/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2090/// by using a vspltis[bhw] instruction of the specified element size, return
2091/// the constant being splatted. The ByteSize field indicates the number of
2092/// bytes of each element [124] -> [bhw].
2093SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2094 SDValue OpVal(nullptr, 0);
2095
2096 // If ByteSize of the splat is bigger than the element size of the
2097 // build_vector, then we have a case where we are checking for a splat where
2098 // multiple elements of the buildvector are folded together into a single
2099 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2100 unsigned EltSize = 16/N->getNumOperands();
2101 if (EltSize < ByteSize) {
2102 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2103 SDValue UniquedVals[4];
2104 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2104, __PRETTY_FUNCTION__))
;
2105
2106 // See if all of the elements in the buildvector agree across.
2107 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2108 if (N->getOperand(i).isUndef()) continue;
2109 // If the element isn't a constant, bail fully out.
2110 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2111
2112 if (!UniquedVals[i&(Multiple-1)].getNode())
2113 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2114 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2115 return SDValue(); // no match.
2116 }
2117
2118 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2119 // either constant or undef values that are identical for each chunk. See
2120 // if these chunks can form into a larger vspltis*.
2121
2122 // Check to see if all of the leading entries are either 0 or -1. If
2123 // neither, then this won't fit into the immediate field.
2124 bool LeadingZero = true;
2125 bool LeadingOnes = true;
2126 for (unsigned i = 0; i != Multiple-1; ++i) {
2127 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2128
2129 LeadingZero &= isNullConstant(UniquedVals[i]);
2130 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2131 }
2132 // Finally, check the least significant entry.
2133 if (LeadingZero) {
2134 if (!UniquedVals[Multiple-1].getNode())
2135 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2136 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2137 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2138 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2139 }
2140 if (LeadingOnes) {
2141 if (!UniquedVals[Multiple-1].getNode())
2142 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2143 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2144 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2145 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2146 }
2147
2148 return SDValue();
2149 }
2150
2151 // Check to see if this buildvec has a single non-undef value in its elements.
2152 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2153 if (N->getOperand(i).isUndef()) continue;
2154 if (!OpVal.getNode())
2155 OpVal = N->getOperand(i);
2156 else if (OpVal != N->getOperand(i))
2157 return SDValue();
2158 }
2159
2160 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2161
2162 unsigned ValSizeInBytes = EltSize;
2163 uint64_t Value = 0;
2164 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2165 Value = CN->getZExtValue();
2166 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2167 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2167, __PRETTY_FUNCTION__))
;
2168 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2169 }
2170
2171 // If the splat value is larger than the element value, then we can never do
2172 // this splat. The only case that we could fit the replicated bits into our
2173 // immediate field for would be zero, and we prefer to use vxor for it.
2174 if (ValSizeInBytes < ByteSize) return SDValue();
2175
2176 // If the element value is larger than the splat value, check if it consists
2177 // of a repeated bit pattern of size ByteSize.
2178 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2179 return SDValue();
2180
2181 // Properly sign extend the value.
2182 int MaskVal = SignExtend32(Value, ByteSize * 8);
2183
2184 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2185 if (MaskVal == 0) return SDValue();
2186
2187 // Finally, if this value fits in a 5 bit sext field, return it
2188 if (SignExtend32<5>(MaskVal) == MaskVal)
2189 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2190 return SDValue();
2191}
2192
2193/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2194/// amount, otherwise return -1.
2195int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2196 EVT VT = N->getValueType(0);
2197 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2198 return -1;
2199
2200 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2201
2202 // Find the first non-undef value in the shuffle mask.
2203 unsigned i;
2204 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2205 /*search*/;
2206
2207 if (i == 4) return -1; // all undef.
2208
2209 // Otherwise, check to see if the rest of the elements are consecutively
2210 // numbered from this value.
2211 unsigned ShiftAmt = SVOp->getMaskElt(i);
2212 if (ShiftAmt < i) return -1;
2213 ShiftAmt -= i;
2214
2215 // Check the rest of the elements to see if they are consecutive.
2216 for (++i; i != 4; ++i)
2217 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2218 return -1;
2219
2220 return ShiftAmt;
2221}
2222
2223//===----------------------------------------------------------------------===//
2224// Addressing Mode Selection
2225//===----------------------------------------------------------------------===//
2226
2227/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2228/// or 64-bit immediate, and if the value can be accurately represented as a
2229/// sign extension from a 16-bit value. If so, this returns true and the
2230/// immediate.
2231bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2232 if (!isa<ConstantSDNode>(N))
2233 return false;
2234
2235 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2236 if (N->getValueType(0) == MVT::i32)
2237 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2238 else
2239 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2240}
2241bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2242 return isIntS16Immediate(Op.getNode(), Imm);
2243}
2244
2245
2246/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2247/// be represented as an indexed [r+r] operation.
2248bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2249 SDValue &Index,
2250 SelectionDAG &DAG) const {
2251 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2252 UI != E; ++UI) {
2253 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2254 if (Memop->getMemoryVT() == MVT::f64) {
2255 Base = N.getOperand(0);
2256 Index = N.getOperand(1);
2257 return true;
2258 }
2259 }
2260 }
2261 return false;
2262}
2263
2264/// SelectAddressRegReg - Given the specified addressed, check to see if it
2265/// can be represented as an indexed [r+r] operation. Returns false if it
2266/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2267/// non-zero and N can be represented by a base register plus a signed 16-bit
2268/// displacement, make a more precise judgement by checking (displacement % \p
2269/// EncodingAlignment).
2270bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2271 SDValue &Index, SelectionDAG &DAG,
2272 unsigned EncodingAlignment) const {
2273 int16_t imm = 0;
2274 if (N.getOpcode() == ISD::ADD) {
2275 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2276 // SPE load/store can only handle 8-bit offsets.
2277 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2278 return true;
2279 if (isIntS16Immediate(N.getOperand(1), imm) &&
2280 (!EncodingAlignment || !(imm % EncodingAlignment)))
2281 return false; // r+i
2282 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2283 return false; // r+i
2284
2285 Base = N.getOperand(0);
2286 Index = N.getOperand(1);
2287 return true;
2288 } else if (N.getOpcode() == ISD::OR) {
2289 if (isIntS16Immediate(N.getOperand(1), imm) &&
2290 (!EncodingAlignment || !(imm % EncodingAlignment)))
2291 return false; // r+i can fold it if we can.
2292
2293 // If this is an or of disjoint bitfields, we can codegen this as an add
2294 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2295 // disjoint.
2296 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2297
2298 if (LHSKnown.Zero.getBoolValue()) {
2299 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2300 // If all of the bits are known zero on the LHS or RHS, the add won't
2301 // carry.
2302 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2303 Base = N.getOperand(0);
2304 Index = N.getOperand(1);
2305 return true;
2306 }
2307 }
2308 }
2309
2310 return false;
2311}
2312
2313// If we happen to be doing an i64 load or store into a stack slot that has
2314// less than a 4-byte alignment, then the frame-index elimination may need to
2315// use an indexed load or store instruction (because the offset may not be a
2316// multiple of 4). The extra register needed to hold the offset comes from the
2317// register scavenger, and it is possible that the scavenger will need to use
2318// an emergency spill slot. As a result, we need to make sure that a spill slot
2319// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2320// stack slot.
2321static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2322 // FIXME: This does not handle the LWA case.
2323 if (VT != MVT::i64)
2324 return;
2325
2326 // NOTE: We'll exclude negative FIs here, which come from argument
2327 // lowering, because there are no known test cases triggering this problem
2328 // using packed structures (or similar). We can remove this exclusion if
2329 // we find such a test case. The reason why this is so test-case driven is
2330 // because this entire 'fixup' is only to prevent crashes (from the
2331 // register scavenger) on not-really-valid inputs. For example, if we have:
2332 // %a = alloca i1
2333 // %b = bitcast i1* %a to i64*
2334 // store i64* a, i64 b
2335 // then the store should really be marked as 'align 1', but is not. If it
2336 // were marked as 'align 1' then the indexed form would have been
2337 // instruction-selected initially, and the problem this 'fixup' is preventing
2338 // won't happen regardless.
2339 if (FrameIdx < 0)
2340 return;
2341
2342 MachineFunction &MF = DAG.getMachineFunction();
2343 MachineFrameInfo &MFI = MF.getFrameInfo();
2344
2345 unsigned Align = MFI.getObjectAlignment(FrameIdx);
2346 if (Align >= 4)
2347 return;
2348
2349 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2350 FuncInfo->setHasNonRISpills();
2351}
2352
2353/// Returns true if the address N can be represented by a base register plus
2354/// a signed 16-bit displacement [r+imm], and if it is not better
2355/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2356/// displacements that are multiples of that value.
2357bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2358 SDValue &Base,
2359 SelectionDAG &DAG,
2360 unsigned EncodingAlignment) const {
2361 // FIXME dl should come from parent load or store, not from address
2362 SDLoc dl(N);
2363 // If this can be more profitably realized as r+r, fail.
2364 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2365 return false;
2366
2367 if (N.getOpcode() == ISD::ADD) {
2368 int16_t imm = 0;
2369 if (isIntS16Immediate(N.getOperand(1), imm) &&
2370 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2371 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2372 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2373 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2374 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2375 } else {
2376 Base = N.getOperand(0);
2377 }
2378 return true; // [r+i]
2379 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2380 // Match LOAD (ADD (X, Lo(G))).
2381 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2382, __PRETTY_FUNCTION__))
2382 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2382, __PRETTY_FUNCTION__))
;
2383 Disp = N.getOperand(1).getOperand(0); // The global address.
2384 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
2385 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
2386 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
2387 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
;
2388 Base = N.getOperand(0);
2389 return true; // [&g+r]
2390 }
2391 } else if (N.getOpcode() == ISD::OR) {
2392 int16_t imm = 0;
2393 if (isIntS16Immediate(N.getOperand(1), imm) &&
2394 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2395 // If this is an or of disjoint bitfields, we can codegen this as an add
2396 // (for better address arithmetic) if the LHS and RHS of the OR are
2397 // provably disjoint.
2398 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2399
2400 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2401 // If all of the bits are known zero on the LHS or RHS, the add won't
2402 // carry.
2403 if (FrameIndexSDNode *FI =
2404 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2405 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2406 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2407 } else {
2408 Base = N.getOperand(0);
2409 }
2410 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2411 return true;
2412 }
2413 }
2414 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2415 // Loading from a constant address.
2416
2417 // If this address fits entirely in a 16-bit sext immediate field, codegen
2418 // this as "d, 0"
2419 int16_t Imm;
2420 if (isIntS16Immediate(CN, Imm) &&
2421 (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
2422 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2423 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2424 CN->getValueType(0));
2425 return true;
2426 }
2427
2428 // Handle 32-bit sext immediates with LIS + addr mode.
2429 if ((CN->getValueType(0) == MVT::i32 ||
2430 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2431 (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
2432 int Addr = (int)CN->getZExtValue();
2433
2434 // Otherwise, break this down into an LIS + disp.
2435 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2436
2437 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2438 MVT::i32);
2439 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2440 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2441 return true;
2442 }
2443 }
2444
2445 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2446 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2447 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2448 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2449 } else
2450 Base = N;
2451 return true; // [r+0]
2452}
2453
2454/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2455/// represented as an indexed [r+r] operation.
2456bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2457 SDValue &Index,
2458 SelectionDAG &DAG) const {
2459 // Check to see if we can easily represent this as an [r+r] address. This
2460 // will fail if it thinks that the address is more profitably represented as
2461 // reg+imm, e.g. where imm = 0.
2462 if (SelectAddressRegReg(N, Base, Index, DAG))
2463 return true;
2464
2465 // If the address is the result of an add, we will utilize the fact that the
2466 // address calculation includes an implicit add. However, we can reduce
2467 // register pressure if we do not materialize a constant just for use as the
2468 // index register. We only get rid of the add if it is not an add of a
2469 // value and a 16-bit signed constant and both have a single use.
2470 int16_t imm = 0;
2471 if (N.getOpcode() == ISD::ADD &&
2472 (!isIntS16Immediate(N.getOperand(1), imm) ||
2473 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2474 Base = N.getOperand(0);
2475 Index = N.getOperand(1);
2476 return true;
2477 }
2478
2479 // Otherwise, do it the hard way, using R0 as the base register.
2480 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2481 N.getValueType());
2482 Index = N;
2483 return true;
2484}
2485
2486/// Returns true if we should use a direct load into vector instruction
2487/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2488static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2489
2490 // If there are any other uses other than scalar to vector, then we should
2491 // keep it as a scalar load -> direct move pattern to prevent multiple
2492 // loads.
2493 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2494 if (!LD)
2495 return false;
2496
2497 EVT MemVT = LD->getMemoryVT();
2498 if (!MemVT.isSimple())
2499 return false;
2500 switch(MemVT.getSimpleVT().SimpleTy) {
2501 case MVT::i64:
2502 break;
2503 case MVT::i32:
2504 if (!ST.hasP8Vector())
2505 return false;
2506 break;
2507 case MVT::i16:
2508 case MVT::i8:
2509 if (!ST.hasP9Vector())
2510 return false;
2511 break;
2512 default:
2513 return false;
2514 }
2515
2516 SDValue LoadedVal(N, 0);
2517 if (!LoadedVal.hasOneUse())
2518 return false;
2519
2520 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2521 UI != UE; ++UI)
2522 if (UI.getUse().get().getResNo() == 0 &&
2523 UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
2524 return false;
2525
2526 return true;
2527}
2528
2529/// getPreIndexedAddressParts - returns true by value, base pointer and
2530/// offset pointer and addressing mode by reference if the node's address
2531/// can be legally represented as pre-indexed load / store address.
2532bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2533 SDValue &Offset,
2534 ISD::MemIndexedMode &AM,
2535 SelectionDAG &DAG) const {
2536 if (DisablePPCPreinc) return false;
2537
2538 bool isLoad = true;
2539 SDValue Ptr;
2540 EVT VT;
2541 unsigned Alignment;
2542 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2543 Ptr = LD->getBasePtr();
2544 VT = LD->getMemoryVT();
2545 Alignment = LD->getAlignment();
2546 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2547 Ptr = ST->getBasePtr();
2548 VT = ST->getMemoryVT();
2549 Alignment = ST->getAlignment();
2550 isLoad = false;
2551 } else
2552 return false;
2553
2554 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2555 // instructions because we can fold these into a more efficient instruction
2556 // instead, (such as LXSD).
2557 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2558 return false;
2559 }
2560
2561 // PowerPC doesn't have preinc load/store instructions for vectors (except
2562 // for QPX, which does have preinc r+r forms).
2563 if (VT.isVector()) {
2564 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2565 return false;
2566 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2567 AM = ISD::PRE_INC;
2568 return true;
2569 }
2570 }
2571
2572 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2573 // Common code will reject creating a pre-inc form if the base pointer
2574 // is a frame index, or if N is a store and the base pointer is either
2575 // the same as or a predecessor of the value being stored. Check for
2576 // those situations here, and try with swapped Base/Offset instead.
2577 bool Swap = false;
2578
2579 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2580 Swap = true;
2581 else if (!isLoad) {
2582 SDValue Val = cast<StoreSDNode>(N)->getValue();
2583 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2584 Swap = true;
2585 }
2586
2587 if (Swap)
2588 std::swap(Base, Offset);
2589
2590 AM = ISD::PRE_INC;
2591 return true;
2592 }
2593
2594 // LDU/STU can only handle immediates that are a multiple of 4.
2595 if (VT != MVT::i64) {
2596 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2597 return false;
2598 } else {
2599 // LDU/STU need an address with at least 4-byte alignment.
2600 if (Alignment < 4)
2601 return false;
2602
2603 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2604 return false;
2605 }
2606
2607 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2608 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2609 // sext i32 to i64 when addr mode is r+i.
2610 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2611 LD->getExtensionType() == ISD::SEXTLOAD &&
2612 isa<ConstantSDNode>(Offset))
2613 return false;
2614 }
2615
2616 AM = ISD::PRE_INC;
2617 return true;
2618}
2619
2620//===----------------------------------------------------------------------===//
2621// LowerOperation implementation
2622//===----------------------------------------------------------------------===//
2623
2624/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2625/// and LoOpFlags to the target MO flags.
2626static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2627 unsigned &HiOpFlags, unsigned &LoOpFlags,
2628 const GlobalValue *GV = nullptr) {
2629 HiOpFlags = PPCII::MO_HA;
2630 LoOpFlags = PPCII::MO_LO;
2631
2632 // Don't use the pic base if not in PIC relocation model.
2633 if (IsPIC) {
2634 HiOpFlags |= PPCII::MO_PIC_FLAG;
2635 LoOpFlags |= PPCII::MO_PIC_FLAG;
2636 }
2637
2638 // If this is a reference to a global value that requires a non-lazy-ptr, make
2639 // sure that instruction lowering adds it.
2640 if (GV && Subtarget.hasLazyResolverStub(GV)) {
2641 HiOpFlags |= PPCII::MO_NLP_FLAG;
2642 LoOpFlags |= PPCII::MO_NLP_FLAG;
2643
2644 if (GV->hasHiddenVisibility()) {
2645 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2646 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2647 }
2648 }
2649}
2650
2651static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2652 SelectionDAG &DAG) {
2653 SDLoc DL(HiPart);
2654 EVT PtrVT = HiPart.getValueType();
2655 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2656
2657 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2658 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2659
2660 // With PIC, the first instruction is actually "GR+hi(&G)".
2661 if (isPIC)
2662 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2663 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2664
2665 // Generate non-pic code that has direct accesses to the constant pool.
2666 // The address of the global is just (hi(&g)+lo(&g)).
2667 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2668}
2669
2670static void setUsesTOCBasePtr(MachineFunction &MF) {
2671 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2672 FuncInfo->setUsesTOCBasePtr();
2673}
2674
2675static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2676 setUsesTOCBasePtr(DAG.getMachineFunction());
2677}
2678
2679SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2680 SDValue GA) const {
2681 const bool Is64Bit = Subtarget.isPPC64();
2682 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2683 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2684 : Subtarget.isAIXABI()
2685 ? DAG.getRegister(PPC::R2, VT)
2686 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2687 SDValue Ops[] = { GA, Reg };
2688 return DAG.getMemIntrinsicNode(
2689 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2690 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2691 MachineMemOperand::MOLoad);
2692}
2693
2694SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2695 SelectionDAG &DAG) const {
2696 EVT PtrVT = Op.getValueType();
2697 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2698 const Constant *C = CP->getConstVal();
2699
2700 // 64-bit SVR4 ABI code is always position-independent.
2701 // The actual address of the GlobalValue is stored in the TOC.
2702 if (Subtarget.is64BitELFABI()) {
2703 setUsesTOCBasePtr(DAG);
2704 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2705 return getTOCEntry(DAG, SDLoc(CP), GA);
2706 }
2707
2708 unsigned MOHiFlag, MOLoFlag;
2709 bool IsPIC = isPositionIndependent();
2710 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2711
2712 if (IsPIC && Subtarget.isSVR4ABI()) {
2713 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2714 PPCII::MO_PIC_FLAG);
2715 return getTOCEntry(DAG, SDLoc(CP), GA);
2716 }
2717
2718 SDValue CPIHi =
2719 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2720 SDValue CPILo =
2721 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2722 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2723}
2724
2725// For 64-bit PowerPC, prefer the more compact relative encodings.
2726// This trades 32 bits per jump table entry for one or two instructions
2727// on the jump site.
2728unsigned PPCTargetLowering::getJumpTableEncoding() const {
2729 if (isJumpTableRelative())
2730 return MachineJumpTableInfo::EK_LabelDifference32;
2731
2732 return TargetLowering::getJumpTableEncoding();
2733}
2734
2735bool PPCTargetLowering::isJumpTableRelative() const {
2736 if (Subtarget.isPPC64())
2737 return true;
2738 return TargetLowering::isJumpTableRelative();
2739}
2740
2741SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2742 SelectionDAG &DAG) const {
2743 if (!Subtarget.isPPC64())
2744 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2745
2746 switch (getTargetMachine().getCodeModel()) {
2747 case CodeModel::Small:
2748 case CodeModel::Medium:
2749 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2750 default:
2751 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2752 getPointerTy(DAG.getDataLayout()));
2753 }
2754}
2755
2756const MCExpr *
2757PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2758 unsigned JTI,
2759 MCContext &Ctx) const {
2760 if (!Subtarget.isPPC64())
2761 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2762
2763 switch (getTargetMachine().getCodeModel()) {
2764 case CodeModel::Small:
2765 case CodeModel::Medium:
2766 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2767 default:
2768 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2769 }
2770}
2771
2772SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2773 EVT PtrVT = Op.getValueType();
2774 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2775
2776 // 64-bit SVR4 ABI code is always position-independent.
2777 // The actual address of the GlobalValue is stored in the TOC.
2778 if (Subtarget.is64BitELFABI()) {
2779 setUsesTOCBasePtr(DAG);
2780 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2781 return getTOCEntry(DAG, SDLoc(JT), GA);
2782 }
2783
2784 unsigned MOHiFlag, MOLoFlag;
2785 bool IsPIC = isPositionIndependent();
2786 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2787
2788 if (IsPIC && Subtarget.isSVR4ABI()) {
2789 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2790 PPCII::MO_PIC_FLAG);
2791 return getTOCEntry(DAG, SDLoc(GA), GA);
2792 }
2793
2794 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2795 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2796 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2797}
2798
2799SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2800 SelectionDAG &DAG) const {
2801 EVT PtrVT = Op.getValueType();
2802 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2803 const BlockAddress *BA = BASDN->getBlockAddress();
2804
2805 // 64-bit SVR4 ABI code is always position-independent.
2806 // The actual BlockAddress is stored in the TOC.
2807 if (Subtarget.is64BitELFABI()) {
2808 setUsesTOCBasePtr(DAG);
2809 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2810 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2811 }
2812
2813 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2814 if (Subtarget.is32BitELFABI() && isPositionIndependent())
2815 return getTOCEntry(
2816 DAG, SDLoc(BASDN),
2817 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2818
2819 unsigned MOHiFlag, MOLoFlag;
2820 bool IsPIC = isPositionIndependent();
2821 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2822 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2823 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2824 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2825}
2826
2827SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2828 SelectionDAG &DAG) const {
2829 // FIXME: TLS addresses currently use medium model code sequences,
2830 // which is the most useful form. Eventually support for small and
2831 // large models could be added if users need it, at the cost of
2832 // additional complexity.
2833 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2834 if (DAG.getTarget().useEmulatedTLS())
2835 return LowerToTLSEmulatedModel(GA, DAG);
2836
2837 SDLoc dl(GA);
2838 const GlobalValue *GV = GA->getGlobal();
2839 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2840 bool is64bit = Subtarget.isPPC64();
2841 const Module *M = DAG.getMachineFunction().getFunction().getParent();
2842 PICLevel::Level picLevel = M->getPICLevel();
2843
2844 const TargetMachine &TM = getTargetMachine();
2845 TLSModel::Model Model = TM.getTLSModel(GV);
2846
2847 if (Model == TLSModel::LocalExec) {
2848 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2849 PPCII::MO_TPREL_HA);
2850 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2851 PPCII::MO_TPREL_LO);
2852 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2853 : DAG.getRegister(PPC::R2, MVT::i32);
2854
2855 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2856 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2857 }
2858
2859 if (Model == TLSModel::InitialExec) {
2860 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2861 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2862 PPCII::MO_TLS);
2863 SDValue GOTPtr;
2864 if (is64bit) {
2865 setUsesTOCBasePtr(DAG);
2866 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2867 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2868 PtrVT, GOTReg, TGA);
2869 } else {
2870 if (!TM.isPositionIndependent())
2871 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2872 else if (picLevel == PICLevel::SmallPIC)
2873 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2874 else
2875 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2876 }
2877 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2878 PtrVT, TGA, GOTPtr);
2879 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2880 }
2881
2882 if (Model == TLSModel::GeneralDynamic) {
2883 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2884 SDValue GOTPtr;
2885 if (is64bit) {
2886 setUsesTOCBasePtr(DAG);
2887 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2888 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2889 GOTReg, TGA);
2890 } else {
2891 if (picLevel == PICLevel::SmallPIC)
2892 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2893 else
2894 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2895 }
2896 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2897 GOTPtr, TGA, TGA);
2898 }
2899
2900 if (Model == TLSModel::LocalDynamic) {
2901 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2902 SDValue GOTPtr;
2903 if (is64bit) {
2904 setUsesTOCBasePtr(DAG);
2905 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2906 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2907 GOTReg, TGA);
2908 } else {
2909 if (picLevel == PICLevel::SmallPIC)
2910 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2911 else
2912 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2913 }
2914 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2915 PtrVT, GOTPtr, TGA, TGA);
2916 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2917 PtrVT, TLSAddr, TGA);
2918 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2919 }
2920
2921 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2921)
;
2922}
2923
2924SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2925 SelectionDAG &DAG) const {
2926 EVT PtrVT = Op.getValueType();
2927 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2928 SDLoc DL(GSDN);
2929 const GlobalValue *GV = GSDN->getGlobal();
2930
2931 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
2932 // The actual address of the GlobalValue is stored in the TOC.
2933 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2934 setUsesTOCBasePtr(DAG);
2935 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2936 return getTOCEntry(DAG, DL, GA);
2937 }
2938
2939 unsigned MOHiFlag, MOLoFlag;
2940 bool IsPIC = isPositionIndependent();
2941 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2942
2943 if (IsPIC && Subtarget.isSVR4ABI()) {
2944 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2945 GSDN->getOffset(),
2946 PPCII::MO_PIC_FLAG);
2947 return getTOCEntry(DAG, DL, GA);
2948 }
2949
2950 SDValue GAHi =
2951 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2952 SDValue GALo =
2953 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2954
2955 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2956
2957 // If the global reference is actually to a non-lazy-pointer, we have to do an
2958 // extra load to get the address of the global.
2959 if (MOHiFlag & PPCII::MO_NLP_FLAG)
2960 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2961 return Ptr;
2962}
2963
2964SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2965 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2966 SDLoc dl(Op);
2967
2968 if (Op.getValueType() == MVT::v2i64) {
2969 // When the operands themselves are v2i64 values, we need to do something
2970 // special because VSX has no underlying comparison operations for these.
2971 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2972 // Equality can be handled by casting to the legal type for Altivec
2973 // comparisons, everything else needs to be expanded.
2974 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2975 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2976 DAG.getSetCC(dl, MVT::v4i32,
2977 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2978 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2979 CC));
2980 }
2981
2982 return SDValue();
2983 }
2984
2985 // We handle most of these in the usual way.
2986 return Op;
2987 }
2988
2989 // If we're comparing for equality to zero, expose the fact that this is
2990 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2991 // fold the new nodes.
2992 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2993 return V;
2994
2995 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2996 // Leave comparisons against 0 and -1 alone for now, since they're usually
2997 // optimized. FIXME: revisit this when we can custom lower all setcc
2998 // optimizations.
2999 if (C->isAllOnesValue() || C->isNullValue())
3000 return SDValue();
3001 }
3002
3003 // If we have an integer seteq/setne, turn it into a compare against zero
3004 // by xor'ing the rhs with the lhs, which is faster than setting a
3005 // condition register, reading it back out, and masking the correct bit. The
3006 // normal approach here uses sub to do this instead of xor. Using xor exposes
3007 // the result to other bit-twiddling opportunities.
3008 EVT LHSVT = Op.getOperand(0).getValueType();
3009 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3010 EVT VT = Op.getValueType();
3011 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3012 Op.getOperand(1));
3013 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3014 }
3015 return SDValue();
3016}
3017
3018SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3019 SDNode *Node = Op.getNode();
3020 EVT VT = Node->getValueType(0);
3021 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3022 SDValue InChain = Node->getOperand(0);
3023 SDValue VAListPtr = Node->getOperand(1);
3024 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3025 SDLoc dl(Node);
3026
3027 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3027, __PRETTY_FUNCTION__))
;
3028
3029 // gpr_index
3030 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3031 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3032 InChain = GprIndex.getValue(1);
3033
3034 if (VT == MVT::i64) {
3035 // Check if GprIndex is even
3036 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3037 DAG.getConstant(1, dl, MVT::i32));
3038 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3039 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3040 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3041 DAG.getConstant(1, dl, MVT::i32));
3042 // Align GprIndex to be even if it isn't
3043 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3044 GprIndex);
3045 }
3046
3047 // fpr index is 1 byte after gpr
3048 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3049 DAG.getConstant(1, dl, MVT::i32));
3050
3051 // fpr
3052 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3053 FprPtr, MachinePointerInfo(SV), MVT::i8);
3054 InChain = FprIndex.getValue(1);
3055
3056 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3057 DAG.getConstant(8, dl, MVT::i32));
3058
3059 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3060 DAG.getConstant(4, dl, MVT::i32));
3061
3062 // areas
3063 SDValue OverflowArea =
3064 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3065 InChain = OverflowArea.getValue(1);
3066
3067 SDValue RegSaveArea =
3068 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3069 InChain = RegSaveArea.getValue(1);
3070
3071 // select overflow_area if index > 8
3072 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3073 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3074
3075 // adjustment constant gpr_index * 4/8
3076 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3077 VT.isInteger() ? GprIndex : FprIndex,
3078 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3079 MVT::i32));
3080
3081 // OurReg = RegSaveArea + RegConstant
3082 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3083 RegConstant);
3084
3085 // Floating types are 32 bytes into RegSaveArea
3086 if (VT.isFloatingPoint())
3087 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3088 DAG.getConstant(32, dl, MVT::i32));
3089
3090 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3091 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3092 VT.isInteger() ? GprIndex : FprIndex,
3093 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3094 MVT::i32));
3095
3096 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3097 VT.isInteger() ? VAListPtr : FprPtr,
3098 MachinePointerInfo(SV), MVT::i8);
3099
3100 // determine if we should load from reg_save_area or overflow_area
3101 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3102
3103 // increase overflow_area by 4/8 if gpr/fpr > 8
3104 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3105 DAG.getConstant(VT.isInteger() ? 4 : 8,
3106 dl, MVT::i32));
3107
3108 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3109 OverflowAreaPlusN);
3110
3111 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3112 MachinePointerInfo(), MVT::i32);
3113
3114 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3115}
3116
3117SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3118 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3118, __PRETTY_FUNCTION__))
;
3119
3120 // We have to copy the entire va_list struct:
3121 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3122 return DAG.getMemcpy(Op.getOperand(0), Op,
3123 Op.getOperand(1), Op.getOperand(2),
3124 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3125 false, MachinePointerInfo(), MachinePointerInfo());
3126}
3127
3128SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3129 SelectionDAG &DAG) const {
3130 return Op.getOperand(0);
3131}
3132
3133SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3134 SelectionDAG &DAG) const {
3135 SDValue Chain = Op.getOperand(0);
3136 SDValue Trmp = Op.getOperand(1); // trampoline
3137 SDValue FPtr = Op.getOperand(2); // nested function
3138 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3139 SDLoc dl(Op);
3140
3141 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3142 bool isPPC64 = (PtrVT == MVT::i64);
3143 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3144
3145 TargetLowering::ArgListTy Args;
3146 TargetLowering::ArgListEntry Entry;
3147
3148 Entry.Ty = IntPtrTy;
3149 Entry.Node = Trmp; Args.push_back(Entry);
3150
3151 // TrampSize == (isPPC64 ? 48 : 40);
3152 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3153 isPPC64 ? MVT::i64 : MVT::i32);
3154 Args.push_back(Entry);
3155
3156 Entry.Node = FPtr; Args.push_back(Entry);
3157 Entry.Node = Nest; Args.push_back(Entry);
3158
3159 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3160 TargetLowering::CallLoweringInfo CLI(DAG);
3161 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3162 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3163 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3164
3165 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3166 return CallResult.second;
3167}
3168
3169SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3170 MachineFunction &MF = DAG.getMachineFunction();
3171 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3172 EVT PtrVT = getPointerTy(MF.getDataLayout());
3173
3174 SDLoc dl(Op);
3175
3176 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3177 // vastart just stores the address of the VarArgsFrameIndex slot into the
3178 // memory location argument.
3179 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3180 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3181 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3182 MachinePointerInfo(SV));
3183 }
3184
3185 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3186 // We suppose the given va_list is already allocated.
3187 //
3188 // typedef struct {
3189 // char gpr; /* index into the array of 8 GPRs
3190 // * stored in the register save area
3191 // * gpr=0 corresponds to r3,
3192 // * gpr=1 to r4, etc.
3193 // */
3194 // char fpr; /* index into the array of 8 FPRs
3195 // * stored in the register save area
3196 // * fpr=0 corresponds to f1,
3197 // * fpr=1 to f2, etc.
3198 // */
3199 // char *overflow_arg_area;
3200 // /* location on stack that holds
3201 // * the next overflow argument
3202 // */
3203 // char *reg_save_area;
3204 // /* where r3:r10 and f1:f8 (if saved)
3205 // * are stored
3206 // */
3207 // } va_list[1];
3208
3209 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3210 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3211 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3212 PtrVT);
3213 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3214 PtrVT);
3215
3216 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3217 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3218
3219 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3220 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3221
3222 uint64_t FPROffset = 1;
3223 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3224
3225 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3226
3227 // Store first byte : number of int regs
3228 SDValue firstStore =
3229 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3230 MachinePointerInfo(SV), MVT::i8);
3231 uint64_t nextOffset = FPROffset;
3232 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3233 ConstFPROffset);
3234
3235 // Store second byte : number of float regs
3236 SDValue secondStore =
3237 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3238 MachinePointerInfo(SV, nextOffset), MVT::i8);
3239 nextOffset += StackOffset;
3240 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3241
3242 // Store second word : arguments given on stack
3243 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3244 MachinePointerInfo(SV, nextOffset));
3245 nextOffset += FrameOffset;
3246 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3247
3248 // Store third word : arguments given in registers
3249 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3250 MachinePointerInfo(SV, nextOffset));
3251}
3252
3253/// FPR - The set of FP registers that should be allocated for arguments
3254/// on Darwin and AIX.
3255static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3256 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3257 PPC::F11, PPC::F12, PPC::F13};
3258
3259/// QFPR - The set of QPX registers that should be allocated for arguments.
3260static const MCPhysReg QFPR[] = {
3261 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
3262 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3263
3264/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3265/// the stack.
3266static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3267 unsigned PtrByteSize) {
3268 unsigned ArgSize = ArgVT.getStoreSize();
3269 if (Flags.isByVal())
3270 ArgSize = Flags.getByValSize();
3271
3272 // Round up to multiples of the pointer size, except for array members,
3273 // which are always packed.
3274 if (!Flags.isInConsecutiveRegs())
3275 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3276
3277 return ArgSize;
3278}
3279
3280/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3281/// on the stack.
3282static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3283 ISD::ArgFlagsTy Flags,
3284 unsigned PtrByteSize) {
3285 unsigned Align = PtrByteSize;
3286
3287 // Altivec parameters are padded to a 16 byte boundary.
3288 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3289 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3290 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3291 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3292 Align = 16;
3293 // QPX vector types stored in double-precision are padded to a 32 byte
3294 // boundary.
3295 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3296 Align = 32;
3297
3298 // ByVal parameters are aligned as requested.
3299 if (Flags.isByVal()) {
3300 unsigned BVAlign = Flags.getByValAlign();
3301 if (BVAlign > PtrByteSize) {
3302 if (BVAlign % PtrByteSize != 0)
3303 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3304)
3304 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3304)
;
3305
3306 Align = BVAlign;
3307 }
3308 }
3309
3310 // Array members are always packed to their original alignment.
3311 if (Flags.isInConsecutiveRegs()) {
3312 // If the array member was split into multiple registers, the first
3313 // needs to be aligned to the size of the full type. (Except for
3314 // ppcf128, which is only aligned as its f64 components.)
3315 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3316 Align = OrigVT.getStoreSize();
3317 else
3318 Align = ArgVT.getStoreSize();
3319 }
3320
3321 return Align;
3322}
3323
3324/// CalculateStackSlotUsed - Return whether this argument will use its
3325/// stack slot (instead of being passed in registers). ArgOffset,
3326/// AvailableFPRs, and AvailableVRs must hold the current argument
3327/// position, and will be updated to account for this argument.
3328static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3329 ISD::ArgFlagsTy Flags,
3330 unsigned PtrByteSize,
3331 unsigned LinkageSize,
3332 unsigned ParamAreaSize,
3333 unsigned &ArgOffset,
3334 unsigned &AvailableFPRs,
3335 unsigned &AvailableVRs, bool HasQPX) {
3336 bool UseMemory = false;
3337
3338 // Respect alignment of argument on the stack.
3339 unsigned Align =
3340 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3341 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3342 // If there's no space left in the argument save area, we must
3343 // use memory (this check also catches zero-sized arguments).
3344 if (ArgOffset >= LinkageSize + ParamAreaSize)
3345 UseMemory = true;
3346
3347 // Allocate argument on the stack.
3348 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3349 if (Flags.isInConsecutiveRegsLast())
3350 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3351 // If we overran the argument save area, we must use memory
3352 // (this check catches arguments passed partially in memory)
3353 if (ArgOffset > LinkageSize + ParamAreaSize)
3354 UseMemory = true;
3355
3356 // However, if the argument is actually passed in an FPR or a VR,
3357 // we don't use memory after all.
3358 if (!Flags.isByVal()) {
3359 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3360 // QPX registers overlap with the scalar FP registers.
3361 (HasQPX && (ArgVT == MVT::v4f32 ||
3362 ArgVT == MVT::v4f64 ||
3363 ArgVT == MVT::v4i1)))
3364 if (AvailableFPRs > 0) {
3365 --AvailableFPRs;
3366 return false;
3367 }
3368 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3369 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3370 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3371 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3372 if (AvailableVRs > 0) {
3373 --AvailableVRs;
3374 return false;
3375 }
3376 }
3377
3378 return UseMemory;
3379}
3380
3381/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3382/// ensure minimum alignment required for target.
3383static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3384 unsigned NumBytes) {
3385 unsigned TargetAlign = Lowering->getStackAlignment();
3386 unsigned AlignMask = TargetAlign - 1;
3387 NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3388 return NumBytes;
3389}
3390
3391SDValue PPCTargetLowering::LowerFormalArguments(
3392 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3393 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3394 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3395 if (Subtarget.is64BitELFABI())
3396 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3397 InVals);
3398 else if (Subtarget.is32BitELFABI())
3399 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3400 InVals);
3401
3402 // FIXME: We are using this for both AIX and Darwin. We should add appropriate
3403 // AIX testing, and rename it appropriately.
3404 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3405 InVals);
3406}
3407
3408SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3409 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3410 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3411 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3412
3413 // 32-bit SVR4 ABI Stack Frame Layout:
3414 // +-----------------------------------+
3415 // +--> | Back chain |
3416 // | +-----------------------------------+
3417 // | | Floating-point register save area |
3418 // | +-----------------------------------+
3419 // | | General register save area |
3420 // | +-----------------------------------+
3421 // | | CR save word |
3422 // | +-----------------------------------+
3423 // | | VRSAVE save word |
3424 // | +-----------------------------------+
3425 // | | Alignment padding |
3426 // | +-----------------------------------+
3427 // | | Vector register save area |
3428 // | +-----------------------------------+
3429 // | | Local variable space |
3430 // | +-----------------------------------+
3431 // | | Parameter list area |
3432 // | +-----------------------------------+
3433 // | | LR save word |
3434 // | +-----------------------------------+
3435 // SP--> +--- | Back chain |
3436 // +-----------------------------------+
3437 //
3438 // Specifications:
3439 // System V Application Binary Interface PowerPC Processor Supplement
3440 // AltiVec Technology Programming Interface Manual
3441
3442 MachineFunction &MF = DAG.getMachineFunction();
3443 MachineFrameInfo &MFI = MF.getFrameInfo();
3444 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3445
3446 EVT PtrVT = getPointerTy(MF.getDataLayout());
3447 // Potential tail calls could cause overwriting of argument stack slots.
3448 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3449 (CallConv == CallingConv::Fast));
3450 unsigned PtrByteSize = 4;
3451
3452 // Assign locations to all of the incoming arguments.
3453 SmallVector<CCValAssign, 16> ArgLocs;
3454 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3455 *DAG.getContext());
3456
3457 // Reserve space for the linkage area on the stack.
3458 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3459 CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3460 if (useSoftFloat())
3461 CCInfo.PreAnalyzeFormalArguments(Ins);
3462
3463 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3464 CCInfo.clearWasPPCF128();
3465
3466 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3467 CCValAssign &VA = ArgLocs[i];
3468
3469 // Arguments stored in registers.
3470 if (VA.isRegLoc()) {
3471 const TargetRegisterClass *RC;
3472 EVT ValVT = VA.getValVT();
3473
3474 switch (ValVT.getSimpleVT().SimpleTy) {
3475 default:
3476 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3476)
;
3477 case MVT::i1:
3478 case MVT::i32:
3479 RC = &PPC::GPRCRegClass;
3480 break;
3481 case MVT::f32:
3482 if (Subtarget.hasP8Vector())
3483 RC = &PPC::VSSRCRegClass;
3484 else if (Subtarget.hasSPE())
3485 RC = &PPC::GPRCRegClass;
3486 else
3487 RC = &PPC::F4RCRegClass;
3488 break;
3489 case MVT::f64:
3490 if (Subtarget.hasVSX())
3491 RC = &PPC::VSFRCRegClass;
3492 else if (Subtarget.hasSPE())
3493 // SPE passes doubles in GPR pairs.
3494 RC = &PPC::GPRCRegClass;
3495 else
3496 RC = &PPC::F8RCRegClass;
3497 break;
3498 case MVT::v16i8:
3499 case MVT::v8i16:
3500 case MVT::v4i32:
3501 RC = &PPC::VRRCRegClass;
3502 break;
3503 case MVT::v4f32:
3504 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3505 break;
3506 case MVT::v2f64:
3507 case MVT::v2i64:
3508 RC = &PPC::VRRCRegClass;
3509 break;
3510 case MVT::v4f64:
3511 RC = &PPC::QFRCRegClass;
3512 break;
3513 case MVT::v4i1:
3514 RC = &PPC::QBRCRegClass;
3515 break;
3516 }
3517
3518 SDValue ArgValue;
3519 // Transform the arguments stored in physical registers into
3520 // virtual ones.
3521 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3522 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3522, __PRETTY_FUNCTION__))
;
3523 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3524 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3525 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3526 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3527 if (!Subtarget.isLittleEndian())
3528 std::swap (ArgValueLo, ArgValueHi);
3529 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3530 ArgValueHi);
3531 } else {
3532 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3533 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3534 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3535 if (ValVT == MVT::i1)
3536 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3537 }
3538
3539 InVals.push_back(ArgValue);
3540 } else {
3541 // Argument stored in memory.
3542 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3542, __PRETTY_FUNCTION__))
;
3543
3544 // Get the extended size of the argument type in stack
3545 unsigned ArgSize = VA.getLocVT().getStoreSize();
3546 // Get the actual size of the argument type
3547 unsigned ObjSize = VA.getValVT().getStoreSize();
3548 unsigned ArgOffset = VA.getLocMemOffset();
3549 // Stack objects in PPC32 are right justified.
3550 ArgOffset += ArgSize - ObjSize;
3551 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3552
3553 // Create load nodes to retrieve arguments from the stack.
3554 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3555 InVals.push_back(
3556 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3557 }
3558 }
3559
3560 // Assign locations to all of the incoming aggregate by value arguments.
3561 // Aggregates passed by value are stored in the local variable space of the
3562 // caller's stack frame, right above the parameter list area.
3563 SmallVector<CCValAssign, 16> ByValArgLocs;
3564 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3565 ByValArgLocs, *DAG.getContext());
3566
3567 // Reserve stack space for the allocations in CCInfo.
3568 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3569
3570 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3571
3572 // Area that is at least reserved in the caller of this function.
3573 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3574 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3575
3576 // Set the size that is at least reserved in caller of this function. Tail
3577 // call optimized function's reserved stack space needs to be aligned so that
3578 // taking the difference between two stack areas will result in an aligned
3579 // stack.
3580 MinReservedArea =
3581 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3582 FuncInfo->setMinReservedArea(MinReservedArea);
3583
3584 SmallVector<SDValue, 8> MemOps;
3585
3586 // If the function takes variable number of arguments, make a frame index for
3587 // the start of the first vararg value... for expansion of llvm.va_start.
3588 if (isVarArg) {
3589 static const MCPhysReg GPArgRegs[] = {
3590 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3591 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3592 };
3593 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3594
3595 static const MCPhysReg FPArgRegs[] = {
3596 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3597 PPC::F8
3598 };
3599 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3600
3601 if (useSoftFloat() || hasSPE())
3602 NumFPArgRegs = 0;
3603
3604 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3605 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3606
3607 // Make room for NumGPArgRegs and NumFPArgRegs.
3608 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3609 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3610
3611 FuncInfo->setVarArgsStackOffset(
3612 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3613 CCInfo.getNextStackOffset(), true));
3614
3615 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3616 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3617
3618 // The fixed integer arguments of a variadic function are stored to the
3619 // VarArgsFrameIndex on the stack so that they may be loaded by
3620 // dereferencing the result of va_next.
3621 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3622 // Get an existing live-in vreg, or add a new one.
3623 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3624 if (!VReg)
3625 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3626
3627 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3628 SDValue Store =
3629 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3630 MemOps.push_back(Store);
3631 // Increment the address by four for the next argument to store
3632 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3633 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3634 }
3635
3636 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3637 // is set.
3638 // The double arguments are stored to the VarArgsFrameIndex
3639 // on the stack.
3640 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3641 // Get an existing live-in vreg, or add a new one.
3642 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3643 if (!VReg)
3644 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3645
3646 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3647 SDValue Store =
3648 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3649 MemOps.push_back(Store);
3650 // Increment the address by eight for the next argument to store
3651 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3652 PtrVT);
3653 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3654 }
3655 }
3656
3657 if (!MemOps.empty())
3658 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3659
3660 return Chain;
3661}
3662
3663// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3664// value to MVT::i64 and then truncate to the correct register size.
3665SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3666 EVT ObjectVT, SelectionDAG &DAG,
3667 SDValue ArgVal,
3668 const SDLoc &dl) const {
3669 if (Flags.isSExt())
3670 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3671 DAG.getValueType(ObjectVT));
3672 else if (Flags.isZExt())
3673 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3674 DAG.getValueType(ObjectVT));
3675
3676 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3677}
3678
3679SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3680 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3681 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3682 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3683 // TODO: add description of PPC stack frame format, or at least some docs.
3684 //
3685 bool isELFv2ABI = Subtarget.isELFv2ABI();
3686 bool isLittleEndian = Subtarget.isLittleEndian();
3687 MachineFunction &MF = DAG.getMachineFunction();
3688 MachineFrameInfo &MFI = MF.getFrameInfo();
3689 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3690
3691 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3692, __PRETTY_FUNCTION__))
3692 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3692, __PRETTY_FUNCTION__))
;
3693
3694 EVT PtrVT = getPointerTy(MF.getDataLayout());
3695 // Potential tail calls could cause overwriting of argument stack slots.
3696 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3697 (CallConv == CallingConv::Fast));
3698 unsigned PtrByteSize = 8;
3699 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3700
3701 static const MCPhysReg GPR[] = {
3702 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3703 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3704 };
3705 static const MCPhysReg VR[] = {
3706 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3707 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3708 };
3709
3710 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3711 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3712 const unsigned Num_VR_Regs = array_lengthof(VR);
3713 const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3714
3715 // Do a first pass over the arguments to determine whether the ABI
3716 // guarantees that our caller has allocated the parameter save area
3717 // on its stack frame. In the ELFv1 ABI, this is always the case;
3718 // in the ELFv2 ABI, it is true if this is a vararg function or if
3719 // any parameter is located in a stack slot.
3720
3721 bool HasParameterArea = !isELFv2ABI || isVarArg;
3722 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3723 unsigned NumBytes = LinkageSize;
3724 unsigned AvailableFPRs = Num_FPR_Regs;
3725 unsigned AvailableVRs = Num_VR_Regs;
3726 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3727 if (Ins[i].Flags.isNest())
3728 continue;
3729
3730 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3731 PtrByteSize, LinkageSize, ParamAreaSize,
3732 NumBytes, AvailableFPRs, AvailableVRs,
3733 Subtarget.hasQPX()))
3734 HasParameterArea = true;
3735 }
3736
3737 // Add DAG nodes to load the arguments or copy them out of registers. On
3738 // entry to a function on PPC, the arguments start after the linkage area,
3739 // although the first ones are often in registers.
3740
3741 unsigned ArgOffset = LinkageSize;
3742 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3743 unsigned &QFPR_idx = FPR_idx;
3744 SmallVector<SDValue, 8> MemOps;
3745 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3746 unsigned CurArgIdx = 0;
3747 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3748 SDValue ArgVal;
3749 bool needsLoad = false;
3750 EVT ObjectVT = Ins[ArgNo].VT;
3751 EVT OrigVT = Ins[ArgNo].ArgVT;
3752 unsigned ObjSize = ObjectVT.getStoreSize();
3753 unsigned ArgSize = ObjSize;
3754 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3755 if (Ins[ArgNo].isOrigArg()) {
3756 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3757 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3758 }
3759 // We re-align the argument offset for each argument, except when using the
3760 // fast calling convention, when we need to make sure we do that only when
3761 // we'll actually use a stack slot.
3762 unsigned CurArgOffset, Align;
3763 auto ComputeArgOffset = [&]() {
3764 /* Respect alignment of argument on the stack. */
3765 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3766 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3767 CurArgOffset = ArgOffset;
3768 };
3769
3770 if (CallConv != CallingConv::Fast) {
3771 ComputeArgOffset();
3772
3773 /* Compute GPR index associated with argument offset. */
3774 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3775 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3776 }
3777
3778 // FIXME the codegen can be much improved in some cases.
3779 // We do not have to keep everything in memory.
3780 if (Flags.isByVal()) {
3781 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3781, __PRETTY_FUNCTION__))
;
3782
3783 if (CallConv == CallingConv::Fast)
3784 ComputeArgOffset();
3785
3786 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3787 ObjSize = Flags.getByValSize();
3788 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3789 // Empty aggregate parameters do not take up registers. Examples:
3790 // struct { } a;
3791 // union { } b;
3792 // int c[0];
3793 // etc. However, we have to provide a place-holder in InVals, so
3794 // pretend we have an 8-byte item at the current address for that
3795 // purpose.
3796 if (!ObjSize) {
3797 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3798 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3799 InVals.push_back(FIN);
3800 continue;
3801 }
3802
3803 // Create a stack object covering all stack doublewords occupied
3804 // by the argument. If the argument is (fully or partially) on
3805 // the stack, or if the argument is fully in registers but the
3806 // caller has allocated the parameter save anyway, we can refer
3807 // directly to the caller's stack frame. Otherwise, create a
3808 // local copy in our own frame.
3809 int FI;
3810 if (HasParameterArea ||
3811 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3812 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3813 else
3814 FI = MFI.CreateStackObject(ArgSize, Align, false);
3815 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3816
3817 // Handle aggregates smaller than 8 bytes.
3818 if (ObjSize < PtrByteSize) {
3819 // The value of the object is its address, which differs from the
3820 // address of the enclosing doubleword on big-endian systems.
3821 SDValue Arg = FIN;
3822 if (!isLittleEndian) {
3823 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3824 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3825 }
3826 InVals.push_back(Arg);
3827
3828 if (GPR_idx != Num_GPR_Regs) {
3829 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3830 FuncInfo->addLiveInAttr(VReg, Flags);
3831 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3832 SDValue Store;
3833
3834 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3835 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3836 (ObjSize == 2 ? MVT::i16 : MVT::i32));
3837 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3838 MachinePointerInfo(&*FuncArg), ObjType);
3839 } else {
3840 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3841 // store the whole register as-is to the parameter save area
3842 // slot.
3843 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3844 MachinePointerInfo(&*FuncArg));
3845 }
3846
3847 MemOps.push_back(Store);
3848 }
3849 // Whether we copied from a register or not, advance the offset
3850 // into the parameter save area by a full doubleword.
3851 ArgOffset += PtrByteSize;
3852 continue;
3853 }
3854
3855 // The value of the object is its address, which is the address of
3856 // its first stack doubleword.
3857 InVals.push_back(FIN);
3858
3859 // Store whatever pieces of the object are in registers to memory.
3860 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3861 if (GPR_idx == Num_GPR_Regs)
3862 break;
3863
3864 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3865 FuncInfo->addLiveInAttr(VReg, Flags);
3866 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3867 SDValue Addr = FIN;
3868 if (j) {
3869 SDValue Off = DAG.getConstant(j, dl, PtrVT);
3870 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3871 }
3872 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3873 MachinePointerInfo(&*FuncArg, j));
3874 MemOps.push_back(Store);
3875 ++GPR_idx;
3876 }
3877 ArgOffset += ArgSize;
3878 continue;
3879 }
3880
3881 switch (ObjectVT.getSimpleVT().SimpleTy) {
3882 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3882)
;
3883 case MVT::i1:
3884 case MVT::i32:
3885 case MVT::i64:
3886 if (Flags.isNest()) {
3887 // The 'nest' parameter, if any, is passed in R11.
3888 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3889 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3890
3891 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3892 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3893
3894 break;
3895 }
3896
3897 // These can be scalar arguments or elements of an integer array type
3898 // passed directly. Clang may use those instead of "byval" aggregate
3899 // types to avoid forcing arguments to memory unnecessarily.
3900 if (GPR_idx != Num_GPR_Regs) {
3901 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3902 FuncInfo->addLiveInAttr(VReg, Flags);
3903 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3904
3905 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3906 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3907 // value to MVT::i64 and then truncate to the correct register size.
3908 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3909 } else {
3910 if (CallConv == CallingConv::Fast)
3911 ComputeArgOffset();
3912
3913 needsLoad = true;
3914 ArgSize = PtrByteSize;
3915 }
3916 if (CallConv != CallingConv::Fast || needsLoad)
3917 ArgOffset += 8;
3918 break;
3919
3920 case MVT::f32:
3921 case MVT::f64:
3922 // These can be scalar arguments or elements of a float array type
3923 // passed directly. The latter are used to implement ELFv2 homogenous
3924 // float aggregates.
3925 if (FPR_idx != Num_FPR_Regs) {
3926 unsigned VReg;
3927
3928 if (ObjectVT == MVT::f32)
3929 VReg = MF.addLiveIn(FPR[FPR_idx],
3930 Subtarget.hasP8Vector()
3931 ? &PPC::VSSRCRegClass
3932 : &PPC::F4RCRegClass);
3933 else
3934 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3935 ? &PPC::VSFRCRegClass
3936 : &PPC::F8RCRegClass);
3937
3938 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3939 ++FPR_idx;
3940 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3941 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3942 // once we support fp <-> gpr moves.
3943
3944 // This can only ever happen in the presence of f32 array types,
3945 // since otherwise we never run out of FPRs before running out
3946 // of GPRs.
3947 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3948 FuncInfo->addLiveInAttr(VReg, Flags);
3949 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3950
3951 if (ObjectVT == MVT::f32) {
3952 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3953 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3954 DAG.getConstant(32, dl, MVT::i32));
3955 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3956 }
3957
3958 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3959 } else {
3960 if (CallConv == CallingConv::Fast)
3961 ComputeArgOffset();
3962
3963 needsLoad = true;
3964 }
3965
3966 // When passing an array of floats, the array occupies consecutive
3967 // space in the argument area; only round up to the next doubleword
3968 // at the end of the array. Otherwise, each float takes 8 bytes.
3969 if (CallConv != CallingConv::Fast || needsLoad) {
3970 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3971 ArgOffset += ArgSize;
3972 if (Flags.isInConsecutiveRegsLast())
3973 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3974 }
3975 break;
3976 case MVT::v4f32:
3977 case MVT::v4i32:
3978 case MVT::v8i16:
3979 case MVT::v16i8:
3980 case MVT::v2f64:
3981 case MVT::v2i64:
3982 case MVT::v1i128:
3983 case MVT::f128:
3984 if (!Subtarget.hasQPX()) {
3985 // These can be scalar arguments or elements of a vector array type
3986 // passed directly. The latter are used to implement ELFv2 homogenous
3987 // vector aggregates.
3988 if (VR_idx != Num_VR_Regs) {
3989 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3990 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3991 ++VR_idx;
3992 } else {
3993 if (CallConv == CallingConv::Fast)
3994 ComputeArgOffset();
3995 needsLoad = true;
3996 }
3997 if (CallConv != CallingConv::Fast || needsLoad)
3998 ArgOffset += 16;
3999 break;
4000 } // not QPX
4001
4002 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&((ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"
) ? static_cast<void> (0) : __assert_fail ("ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4003, __PRETTY_FUNCTION__))
4003 "Invalid QPX parameter type")((ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"
) ? static_cast<void> (0) : __assert_fail ("ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4003, __PRETTY_FUNCTION__))
;
4004 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4005
4006 case MVT::v4f64:
4007 case MVT::v4i1:
4008 // QPX vectors are treated like their scalar floating-point subregisters
4009 // (except that they're larger).
4010 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
4011 if (QFPR_idx != Num_QFPR_Regs) {
4012 const TargetRegisterClass *RC;
4013 switch (ObjectVT.getSimpleVT().SimpleTy) {
4014 case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
4015 case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
4016 default: RC = &PPC::QBRCRegClass; break;
4017 }
4018
4019 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4020 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4021 ++QFPR_idx;
4022 } else {
4023 if (CallConv == CallingConv::Fast)
4024 ComputeArgOffset();
4025 needsLoad = true;
4026 }
4027 if (CallConv != CallingConv::Fast || needsLoad)
4028 ArgOffset += Sz;
4029 break;
4030 }
4031
4032 // We need to load the argument to a virtual register if we determined
4033 // above that we ran out of physical registers of the appropriate type.
4034 if (needsLoad) {
4035 if (ObjSize < ArgSize && !isLittleEndian)
4036 CurArgOffset += ArgSize - ObjSize;
4037 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4038 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4039 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4040 }
4041
4042 InVals.push_back(ArgVal);
4043 }
4044
4045 // Area that is at least reserved in the caller of this function.
4046 unsigned MinReservedArea;
4047 if (HasParameterArea)
4048 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4049 else
4050 MinReservedArea = LinkageSize;
4051
4052 // Set the size that is at least reserved in caller of this function. Tail
4053 // call optimized functions' reserved stack space needs to be aligned so that
4054 // taking the difference between two stack areas will result in an aligned
4055 // stack.
4056 MinReservedArea =
4057 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4058 FuncInfo->setMinReservedArea(MinReservedArea);
4059
4060 // If the function takes variable number of arguments, make a frame index for
4061 // the start of the first vararg value... for expansion of llvm.va_start.
4062 if (isVarArg) {
4063 int Depth = ArgOffset;
4064
4065 FuncInfo->setVarArgsFrameIndex(
4066 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4067 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4068
4069 // If this function is vararg, store any remaining integer argument regs
4070 // to their spots on the stack so that they may be loaded by dereferencing
4071 // the result of va_next.
4072 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4073 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4074 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4075 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4076 SDValue Store =
4077 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4078 MemOps.push_back(Store);
4079 // Increment the address by four for the next argument to store
4080 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4081 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4082 }
4083 }
4084
4085 if (!MemOps.empty())
4086 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4087
4088 return Chain;
4089}
4090
4091SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4092 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4093 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4094 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4095 // TODO: add description of PPC stack frame format, or at least some docs.
4096 //
4097 MachineFunction &MF = DAG.getMachineFunction();
4098 MachineFrameInfo &MFI = MF.getFrameInfo();
4099 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4100
4101 EVT PtrVT = getPointerTy(MF.getDataLayout());
4102 bool isPPC64 = PtrVT == MVT::i64;
4103 // Potential tail calls could cause overwriting of argument stack slots.
4104 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4105 (CallConv == CallingConv::Fast));
4106 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4107 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4108 unsigned ArgOffset = LinkageSize;
4109 // Area that is at least reserved in caller of this function.
4110 unsigned MinReservedArea = ArgOffset;
4111
4112 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4113 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4114 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4115 };
4116 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4117 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4118 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4119 };
4120 static const MCPhysReg VR[] = {
4121 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4122 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4123 };
4124
4125 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4126 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4127 const unsigned Num_VR_Regs = array_lengthof( VR);
4128
4129 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4130
4131 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4132
4133 // In 32-bit non-varargs functions, the stack space for vectors is after the
4134 // stack space for non-vectors. We do not use this space unless we have
4135 // too many vectors to fit in registers, something that only occurs in
4136 // constructed examples:), but we have to walk the arglist to figure
4137 // that out...for the pathological case, compute VecArgOffset as the
4138 // start of the vector parameter area. Computing VecArgOffset is the
4139 // entire point of the following loop.
4140 unsigned VecArgOffset = ArgOffset;
4141 if (!isVarArg && !isPPC64) {
4142 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4143 ++ArgNo) {
4144 EVT ObjectVT = Ins[ArgNo].VT;
4145 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4146
4147 if (Flags.isByVal()) {
4148 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4149 unsigned ObjSize = Flags.getByValSize();
4150 unsigned ArgSize =
4151 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4152 VecArgOffset += ArgSize;
4153 continue;
4154 }
4155
4156 switch(ObjectVT.getSimpleVT().SimpleTy) {
4157 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4157)
;
4158 case MVT::i1:
4159 case MVT::i32:
4160 case MVT::f32:
4161 VecArgOffset += 4;
4162 break;
4163 case MVT::i64: // PPC64
4164 case MVT::f64:
4165 // FIXME: We are guaranteed to be !isPPC64 at this point.
4166 // Does MVT::i64 apply?
4167 VecArgOffset += 8;
4168 break;
4169 case MVT::v4f32:
4170 case MVT::v4i32:
4171 case MVT::v8i16:
4172 case MVT::v16i8:
4173 // Nothing to do, we're only looking at Nonvector args here.
4174 break;
4175 }
4176 }
4177 }
4178 // We've found where the vector parameter area in memory is. Skip the
4179 // first 12 parameters; these don't use that memory.
4180 VecArgOffset = ((VecArgOffset+15)/16)*16;
4181 VecArgOffset += 12*16;
4182
4183 // Add DAG nodes to load the arguments or copy them out of registers. On
4184 // entry to a function on PPC, the arguments start after the linkage area,
4185 // although the first ones are often in registers.
4186
4187 SmallVector<SDValue, 8> MemOps;
4188 unsigned nAltivecParamsAtEnd = 0;
4189 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4190 unsigned CurArgIdx = 0;
4191 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4192 SDValue ArgVal;
4193 bool needsLoad = false;
4194 EVT ObjectVT = Ins[ArgNo].VT;
4195 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4196 unsigned ArgSize = ObjSize;
4197 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4198 if (Ins[ArgNo].isOrigArg()) {
4199 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4200 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4201 }
4202 unsigned CurArgOffset = ArgOffset;
4203
4204 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4205 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4206 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4207 if (isVarArg || isPPC64) {
4208 MinReservedArea = ((MinReservedArea+15)/16)*16;
4209 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4210 Flags,
4211 PtrByteSize);
4212 } else nAltivecParamsAtEnd++;
4213 } else
4214 // Calculate min reserved area.
4215 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4216 Flags,
4217 PtrByteSize);
4218
4219 // FIXME the codegen can be much improved in some cases.
4220 // We do not have to keep everything in memory.
4221 if (Flags.isByVal()) {
4222 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4222, __PRETTY_FUNCTION__))
;
4223
4224 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4225 ObjSize = Flags.getByValSize();
4226 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4227 // Objects of size 1 and 2 are right justified, everything else is
4228 // left justified. This means the memory address is adjusted forwards.
4229 if (ObjSize==1 || ObjSize==2) {
4230 CurArgOffset = CurArgOffset + (4 - ObjSize);
4231 }
4232 // The value of the object is its address.
4233 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4234 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4235 InVals.push_back(FIN);
4236 if (ObjSize==1 || ObjSize==2) {
4237 if (GPR_idx != Num_GPR_Regs) {
4238 unsigned VReg;
4239 if (isPPC64)
4240 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4241 else
4242 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4243 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4244 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4245 SDValue Store =
4246 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4247 MachinePointerInfo(&*FuncArg), ObjType);
4248 MemOps.push_back(Store);
4249 ++GPR_idx;
4250 }
4251
4252 ArgOffset += PtrByteSize;
4253
4254 continue;
4255 }
4256 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4257 // Store whatever pieces of the object are in registers
4258 // to memory. ArgOffset will be the address of the beginning
4259 // of the object.
4260 if (GPR_idx != Num_GPR_Regs) {
4261 unsigned VReg;
4262 if (isPPC64)
4263 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4264 else
4265 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4266 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4267 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4268 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4269 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4270 MachinePointerInfo(&*FuncArg, j));
4271 MemOps.push_back(Store);
4272 ++GPR_idx;
4273 ArgOffset += PtrByteSize;
4274 } else {
4275 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4276 break;
4277 }
4278 }
4279 continue;
4280 }
4281
4282 switch (ObjectVT.getSimpleVT().SimpleTy) {
4283 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4283)
;
4284 case MVT::i1:
4285 case MVT::i32:
4286 if (!isPPC64) {
4287 if (GPR_idx != Num_GPR_Regs) {
4288 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4289 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4290
4291 if (ObjectVT == MVT::i1)
4292 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4293
4294 ++GPR_idx;
4295 } else {
4296 needsLoad = true;
4297 ArgSize = PtrByteSize;
4298 }
4299 // All int arguments reserve stack space in the Darwin ABI.
4300 ArgOffset += PtrByteSize;
4301 break;
4302 }
4303 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4304 case MVT::i64: // PPC64
4305 if (GPR_idx != Num_GPR_Regs) {
4306 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4307 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4308
4309 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4310 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4311 // value to MVT::i64 and then truncate to the correct register size.
4312 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4313
4314 ++GPR_idx;
4315 } else {
4316 needsLoad = true;
4317 ArgSize = PtrByteSize;
4318 }
4319 // All int arguments reserve stack space in the Darwin ABI.
4320 ArgOffset += 8;
4321 break;
4322
4323 case MVT::f32:
4324 case MVT::f64:
4325 // Every 4 bytes of argument space consumes one of the GPRs available for
4326 // argument passing.
4327 if (GPR_idx != Num_GPR_Regs) {
4328 ++GPR_idx;
4329 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4330 ++GPR_idx;
4331 }
4332 if (FPR_idx != Num_FPR_Regs) {
4333 unsigned VReg;
4334
4335 if (ObjectVT == MVT::f32)
4336 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4337 else
4338 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4339
4340 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4341 ++FPR_idx;
4342 } else {
4343 needsLoad = true;
4344 }
4345
4346 // All FP arguments reserve stack space in the Darwin ABI.
4347 ArgOffset += isPPC64 ? 8 : ObjSize;
4348 break;
4349 case MVT::v4f32:
4350 case MVT::v4i32:
4351 case MVT::v8i16:
4352 case MVT::v16i8:
4353 // Note that vector arguments in registers don't reserve stack space,
4354 // except in varargs functions.
4355 if (VR_idx != Num_VR_Regs) {
4356 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4357 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4358 if (isVarArg) {
4359 while ((ArgOffset % 16) != 0) {
4360 ArgOffset += PtrByteSize;
4361 if (GPR_idx != Num_GPR_Regs)
4362 GPR_idx++;
4363 }
4364 ArgOffset += 16;
4365 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4366 }
4367 ++VR_idx;
4368 } else {
4369 if (!isVarArg && !isPPC64) {
4370 // Vectors go after all the nonvectors.
4371 CurArgOffset = VecArgOffset;
4372 VecArgOffset += 16;
4373 } else {
4374 // Vectors are aligned.
4375 ArgOffset = ((ArgOffset+15)/16)*16;
4376 CurArgOffset = ArgOffset;
4377 ArgOffset += 16;
4378 }
4379 needsLoad = true;
4380 }
4381 break;
4382 }
4383
4384 // We need to load the argument to a virtual register if we determined above
4385 // that we ran out of physical registers of the appropriate type.
4386 if (needsLoad) {
4387 int FI = MFI.CreateFixedObject(ObjSize,
4388 CurArgOffset + (ArgSize - ObjSize),
4389 isImmutable);
4390 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4391 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4392 }
4393
4394 InVals.push_back(ArgVal);
4395 }
4396
4397 // Allow for Altivec parameters at the end, if needed.
4398 if (nAltivecParamsAtEnd) {
4399 MinReservedArea = ((MinReservedArea+15)/16)*16;
4400 MinReservedArea += 16*nAltivecParamsAtEnd;
4401 }
4402
4403 // Area that is at least reserved in the caller of this function.
4404 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4405
4406 // Set the size that is at least reserved in caller of this function. Tail
4407 // call optimized functions' reserved stack space needs to be aligned so that
4408 // taking the difference between two stack areas will result in an aligned
4409 // stack.
4410 MinReservedArea =
4411 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4412 FuncInfo->setMinReservedArea(MinReservedArea);
4413
4414 // If the function takes variable number of arguments, make a frame index for
4415 // the start of the first vararg value... for expansion of llvm.va_start.
4416 if (isVarArg) {
4417 int Depth = ArgOffset;
4418
4419 FuncInfo->setVarArgsFrameIndex(
4420 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4421 Depth, true));
4422 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4423
4424 // If this function is vararg, store any remaining integer argument regs
4425 // to their spots on the stack so that they may be loaded by dereferencing
4426 // the result of va_next.
4427 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4428 unsigned VReg;
4429
4430 if (isPPC64)
4431 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4432 else
4433 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4434
4435 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4436 SDValue Store =
4437 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4438 MemOps.push_back(Store);
4439 // Increment the address by four for the next argument to store
4440 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4441 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4442 }
4443 }
4444
4445 if (!MemOps.empty())
4446 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4447
4448 return Chain;
4449}
4450
4451/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4452/// adjusted to accommodate the arguments for the tailcall.
4453static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4454 unsigned ParamSize) {
4455
4456 if (!isTailCall) return 0;
4457
4458 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4459 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4460 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4461 // Remember only if the new adjustment is bigger.
4462 if (SPDiff < FI->getTailCallSPDelta())
4463 FI->setTailCallSPDelta(SPDiff);
4464
4465 return SPDiff;
4466}
4467
4468static bool isFunctionGlobalAddress(SDValue Callee);
4469
4470static bool
4471callsShareTOCBase(const Function *Caller, SDValue Callee,
4472 const TargetMachine &TM) {
4473 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4474 // don't have enough information to determine if the caller and calle share
4475 // the same TOC base, so we have to pessimistically assume they don't for
4476 // correctness.
4477 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4478 if (!G)
4479 return false;
4480
4481 const GlobalValue *GV = G->getGlobal();
4482 // The medium and large code models are expected to provide a sufficiently
4483 // large TOC to provide all data addressing needs of a module with a
4484 // single TOC. Since each module will be addressed with a single TOC then we
4485 // only need to check that caller and callee don't cross dso boundaries.
4486 if (CodeModel::Medium == TM.getCodeModel() ||
4487 CodeModel::Large == TM.getCodeModel())
4488 return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4489
4490 // Otherwise we need to ensure callee and caller are in the same section,
4491 // since the linker may allocate multiple TOCs, and we don't know which
4492 // sections will belong to the same TOC base.
4493
4494 if (!GV->isStrongDefinitionForLinker())
4495 return false;
4496
4497 // Any explicitly-specified sections and section prefixes must also match.
4498 // Also, if we're using -ffunction-sections, then each function is always in
4499 // a different section (the same is true for COMDAT functions).
4500 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4501 GV->getSection() != Caller->getSection())
4502 return false;
4503 if (const auto *F = dyn_cast<Function>(GV)) {
4504 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4505 return false;
4506 }
4507
4508 // If the callee might be interposed, then we can't assume the ultimate call
4509 // target will be in the same section. Even in cases where we can assume that
4510 // interposition won't happen, in any case where the linker might insert a
4511 // stub to allow for interposition, we must generate code as though
4512 // interposition might occur. To understand why this matters, consider a
4513 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4514 // in the same section, but a is in a different module (i.e. has a different
4515 // TOC base pointer). If the linker allows for interposition between b and c,
4516 // then it will generate a stub for the call edge between b and c which will
4517 // save the TOC pointer into the designated stack slot allocated by b. If we
4518 // return true here, and therefore allow a tail call between b and c, that
4519 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4520 // pointer into the stack slot allocated by a (where the a -> b stub saved
4521 // a's TOC base pointer). If we're not considering a tail call, but rather,
4522 // whether a nop is needed after the call instruction in b, because the linker
4523 // will insert a stub, it might complain about a missing nop if we omit it
4524 // (although many don't complain in this case).
4525 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4526 return false;
4527
4528 return true;
4529}
4530
4531static bool
4532needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4533 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4534 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4534, __PRETTY_FUNCTION__))
;
4535
4536 const unsigned PtrByteSize = 8;
4537 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4538
4539 static const MCPhysReg GPR[] = {
4540 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4541 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4542 };
4543 static const MCPhysReg VR[] = {
4544 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4545 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4546 };
4547
4548 const unsigned NumGPRs = array_lengthof(GPR);
4549 const unsigned NumFPRs = 13;
4550 const unsigned NumVRs = array_lengthof(VR);
4551 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4552
4553 unsigned NumBytes = LinkageSize;
4554 unsigned AvailableFPRs = NumFPRs;
4555 unsigned AvailableVRs = NumVRs;
4556
4557 for (const ISD::OutputArg& Param : Outs) {
4558 if (Param.Flags.isNest()) continue;
4559
4560 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4561 PtrByteSize, LinkageSize, ParamAreaSize,
4562 NumBytes, AvailableFPRs, AvailableVRs,
4563 Subtarget.hasQPX()))
4564 return true;
4565 }
4566 return false;
4567}
4568
4569static bool
4570hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4571 if (CS.arg_size() != CallerFn->arg_size())
4572 return false;
4573
4574 ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4575 ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4576 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4577
4578 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4579 const Value* CalleeArg = *CalleeArgIter;
4580 const Value* CallerArg = &(*CallerArgIter);
4581 if (CalleeArg == CallerArg)
4582 continue;
4583
4584 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4585 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4586 // }
4587 // 1st argument of callee is undef and has the same type as caller.
4588 if (CalleeArg->getType() == CallerArg->getType() &&
4589 isa<UndefValue>(CalleeArg))
4590 continue;
4591
4592 return false;
4593 }
4594
4595 return true;
4596}
4597
4598// Returns true if TCO is possible between the callers and callees
4599// calling conventions.
4600static bool
4601areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4602 CallingConv::ID CalleeCC) {
4603 // Tail calls are possible with fastcc and ccc.
4604 auto isTailCallableCC = [] (CallingConv::ID CC){
4605 return CC == CallingConv::C || CC == CallingConv::Fast;
4606 };
4607 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4608 return false;
4609
4610 // We can safely tail call both fastcc and ccc callees from a c calling
4611 // convention caller. If the caller is fastcc, we may have less stack space
4612 // than a non-fastcc caller with the same signature so disable tail-calls in
4613 // that case.
4614 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4615}
4616
4617bool
4618PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4619 SDValue Callee,
4620 CallingConv::ID CalleeCC,
4621 ImmutableCallSite CS,
4622 bool isVarArg,
4623 const SmallVectorImpl<ISD::OutputArg> &Outs,
4624 const SmallVectorImpl<ISD::InputArg> &Ins,
4625 SelectionDAG& DAG) const {
4626 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4627
4628 if (DisableSCO && !TailCallOpt) return false;
4629
4630 // Variadic argument functions are not supported.
4631 if (isVarArg) return false;
4632
4633 auto &Caller = DAG.getMachineFunction().getFunction();
4634 // Check that the calling conventions are compatible for tco.
4635 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4636 return false;
4637
4638 // Caller contains any byval parameter is not supported.
4639 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4640 return false;
4641
4642 // Callee contains any byval parameter is not supported, too.
4643 // Note: This is a quick work around, because in some cases, e.g.
4644 // caller's stack size > callee's stack size, we are still able to apply
4645 // sibling call optimization. For example, gcc is able to do SCO for caller1
4646 // in the following example, but not for caller2.
4647 // struct test {
4648 // long int a;
4649 // char ary[56];
4650 // } gTest;
4651 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4652 // b->a = v.a;
4653 // return 0;
4654 // }
4655 // void caller1(struct test a, struct test c, struct test *b) {
4656 // callee(gTest, b); }
4657 // void caller2(struct test *b) { callee(gTest, b); }
4658 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4659 return false;
4660
4661 // If callee and caller use different calling conventions, we cannot pass
4662 // parameters on stack since offsets for the parameter area may be different.
4663 if (Caller.getCallingConv() != CalleeCC &&
4664 needStackSlotPassParameters(Subtarget, Outs))
4665 return false;
4666
4667 // No TCO/SCO on indirect call because Caller have to restore its TOC
4668 if (!isFunctionGlobalAddress(Callee) &&
4669 !isa<ExternalSymbolSDNode>(Callee))
4670 return false;
4671
4672 // If the caller and callee potentially have different TOC bases then we
4673 // cannot tail call since we need to restore the TOC pointer after the call.
4674 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4675 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4676 return false;
4677
4678 // TCO allows altering callee ABI, so we don't have to check further.
4679 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4680 return true;
4681
4682 if (DisableSCO) return false;
4683
4684 // If callee use the same argument list that caller is using, then we can
4685 // apply SCO on this case. If it is not, then we need to check if callee needs
4686 // stack for passing arguments.
4687 if (!hasSameArgumentList(&Caller, CS) &&
4688 needStackSlotPassParameters(Subtarget, Outs)) {
4689 return false;
4690 }
4691
4692 return true;
4693}
4694
4695/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4696/// for tail call optimization. Targets which want to do tail call
4697/// optimization should implement this function.
4698bool
4699PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4700 CallingConv::ID CalleeCC,
4701 bool isVarArg,
4702 const SmallVectorImpl<ISD::InputArg> &Ins,
4703 SelectionDAG& DAG) const {
4704 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4705 return false;
4706
4707 // Variable argument functions are not supported.
4708 if (isVarArg)
4709 return false;
4710
4711 MachineFunction &MF = DAG.getMachineFunction();
4712 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4713 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4714 // Functions containing by val parameters are not supported.
4715 for (unsigned i = 0; i != Ins.size(); i++) {
4716 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4717 if (Flags.isByVal()) return false;
4718 }
4719
4720 // Non-PIC/GOT tail calls are supported.
4721 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4722 return true;
4723
4724 // At the moment we can only do local tail calls (in same module, hidden
4725 // or protected) if we are generating PIC.
4726 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4727 return G->getGlobal()->hasHiddenVisibility()
4728 || G->getGlobal()->hasProtectedVisibility();
4729 }
4730
4731 return false;
4732}
4733
4734/// isCallCompatibleAddress - Return the immediate to use if the specified
4735/// 32-bit value is representable in the immediate field of a BxA instruction.
4736static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4737 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4738 if (!C) return nullptr;
4739
4740 int Addr = C->getZExtValue();
4741 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4742 SignExtend32<26>(Addr) != Addr)
4743 return nullptr; // Top 6 bits have to be sext of immediate.
4744
4745 return DAG
4746 .getConstant(
4747 (int)C->getZExtValue() >> 2, SDLoc(Op),
4748 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4749 .getNode();
4750}
4751
4752namespace {
4753
4754struct TailCallArgumentInfo {
4755 SDValue Arg;
4756 SDValue FrameIdxOp;
4757 int FrameIdx = 0;
4758
4759 TailCallArgumentInfo() = default;
4760};
4761
4762} // end anonymous namespace
4763
4764/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4765static void StoreTailCallArgumentsToStackSlot(
4766 SelectionDAG &DAG, SDValue Chain,
4767 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4768 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4769 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4770 SDValue Arg = TailCallArgs[i].Arg;
4771 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4772 int FI = TailCallArgs[i].FrameIdx;
4773 // Store relative to framepointer.
4774 MemOpChains.push_back(DAG.getStore(
4775 Chain, dl, Arg, FIN,
4776 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4777 }
4778}
4779
4780/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4781/// the appropriate stack slot for the tail call optimized function call.
4782static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4783 SDValue OldRetAddr, SDValue OldFP,
4784 int SPDiff, const SDLoc &dl) {
4785 if (SPDiff) {
4786 // Calculate the new stack slot for the return address.
4787 MachineFunction &MF = DAG.getMachineFunction();
4788 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4789 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4790 bool isPPC64 = Subtarget.isPPC64();
4791 int SlotSize = isPPC64 ? 8 : 4;
4792 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4793 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4794 NewRetAddrLoc, true);
4795 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4796 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4797 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4798 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4799
4800 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4801 // slot as the FP is never overwritten.
4802 if (Subtarget.isDarwinABI()) {
4803 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4804 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4805 true);
4806 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4807 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4808 MachinePointerInfo::getFixedStack(
4809 DAG.getMachineFunction(), NewFPIdx));
4810 }
4811 }
4812 return Chain;
4813}
4814
4815/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4816/// the position of the argument.
4817static void
4818CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4819 SDValue Arg, int SPDiff, unsigned ArgOffset,
4820 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4821 int Offset = ArgOffset + SPDiff;
4822 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4823 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4824 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4825 SDValue FIN = DAG.getFrameIndex(FI, VT);
4826 TailCallArgumentInfo Info;
4827 Info.Arg = Arg;
4828 Info.FrameIdxOp = FIN;
4829 Info.FrameIdx = FI;
4830 TailCallArguments.push_back(Info);
4831}
4832
4833/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4834/// stack slot. Returns the chain as result and the loaded frame pointers in
4835/// LROpOut/FPOpout. Used when tail calling.
4836SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4837 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4838 SDValue &FPOpOut, const SDLoc &dl) const {
4839 if (SPDiff) {
4840 // Load the LR and FP stack slot for later adjusting.
4841 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4842 LROpOut = getReturnAddrFrameIndex(DAG);
4843 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4844 Chain = SDValue(LROpOut.getNode(), 1);
4845
4846 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4847 // slot as the FP is never overwritten.
4848 if (Subtarget.isDarwinABI()) {
4849 FPOpOut = getFramePointerFrameIndex(DAG);
4850 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4851 Chain = SDValue(FPOpOut.getNode(), 1);
4852 }
4853 }
4854 return Chain;
4855}
4856
4857/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4858/// by "Src" to address "Dst" of size "Size". Alignment information is
4859/// specified by the specific parameter attribute. The copy will be passed as
4860/// a byval function parameter.
4861/// Sometimes what we are copying is the end of a larger object, the part that
4862/// does not fit in registers.
4863static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4864 SDValue Chain, ISD::ArgFlagsTy Flags,
4865 SelectionDAG &DAG, const SDLoc &dl) {
4866 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4867 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4868 false, false, false, MachinePointerInfo(),
4869 MachinePointerInfo());
4870}
4871
4872/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4873/// tail calls.
4874static void LowerMemOpCallTo(
4875 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4876 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4877 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4878 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4879 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4880 if (!isTailCall) {
4881 if (isVector) {
4882 SDValue StackPtr;
4883 if (isPPC64)
4884 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4885 else
4886 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4887 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4888 DAG.getConstant(ArgOffset, dl, PtrVT));
4889 }
4890 MemOpChains.push_back(
4891 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4892 // Calculate and remember argument location.
4893 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4894 TailCallArguments);
4895}
4896
4897static void
4898PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4899 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4900 SDValue FPOp,
4901 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4902 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4903 // might overwrite each other in case of tail call optimization.
4904 SmallVector<SDValue, 8> MemOpChains2;
4905 // Do not flag preceding copytoreg stuff together with the following stuff.
4906 InFlag = SDValue();
4907 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4908 MemOpChains2, dl);
4909 if (!MemOpChains2.empty())
4910 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4911
4912 // Store the return address to the appropriate stack slot.
4913 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4914
4915 // Emit callseq_end just before tailcall node.
4916 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4917 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4918 InFlag = Chain.getValue(1);
4919}
4920
4921// Is this global address that of a function that can be called by name? (as
4922// opposed to something that must hold a descriptor for an indirect call).
4923static bool isFunctionGlobalAddress(SDValue Callee) {
4924 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4925 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4926 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4927 return false;
4928
4929 return G->getGlobal()->getValueType()->isFunctionTy();
4930 }
4931
4932 return false;
4933}
4934
4935static unsigned
4936PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4937 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4938 bool isPatchPoint, bool hasNest,
4939 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4940 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4941 ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4942 bool isPPC64 = Subtarget.isPPC64();
4943 bool isSVR4ABI = Subtarget.isSVR4ABI();
4944 bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
4945 bool isAIXABI = Subtarget.isAIXABI();
4946
4947 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4948 NodeTys.push_back(MVT::Other); // Returns a chain
4949 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4950
4951 unsigned CallOpc = PPCISD::CALL;
4952
4953 bool needIndirectCall = true;
4954 if (!isSVR4ABI || !isPPC64)
4955 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4956 // If this is an absolute destination address, use the munged value.
4957 Callee = SDValue(Dest, 0);
4958 needIndirectCall = false;
4959 }
4960
4961 // PC-relative references to external symbols should go through $stub, unless
4962 // we're building with the leopard linker or later, which automatically
4963 // synthesizes these stubs.
4964 const TargetMachine &TM = DAG.getTarget();
4965 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
4966 const GlobalValue *GV = nullptr;
4967 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4968 GV = G->getGlobal();
4969 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4970 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4971
4972 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4973 // every direct call is) turn it into a TargetGlobalAddress /
4974 // TargetExternalSymbol node so that legalize doesn't hack it.
4975 if (isFunctionGlobalAddress(Callee)) {
4976 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4977
4978 // A call to a TLS address is actually an indirect call to a
4979 // thread-specific pointer.
4980 unsigned OpFlags = 0;
4981 if (UsePlt)
4982 OpFlags = PPCII::MO_PLT;
4983
4984 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4985 Callee.getValueType(), 0, OpFlags);
4986 needIndirectCall = false;
4987 }
4988
4989 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4990 unsigned char OpFlags = 0;
4991
4992 if (UsePlt)
4993 OpFlags = PPCII::MO_PLT;
4994
4995 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4996 OpFlags);
4997 needIndirectCall = false;
4998 }
4999
5000 if (isPatchPoint) {
5001 // We'll form an invalid direct call when lowering a patchpoint; the full
5002 // sequence for an indirect call is complicated, and many of the
5003 // instructions introduced might have side effects (and, thus, can't be
5004 // removed later). The call itself will be removed as soon as the
5005 // argument/return lowering is complete, so the fact that it has the wrong
5006 // kind of operands should not really matter.
5007 needIndirectCall = false;
5008 }
5009
5010 if (needIndirectCall) {
5011 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
5012 // to do the call, we can't use PPCISD::CALL.
5013 SDValue MTCTROps[] = {Chain, Callee, InFlag};
5014
5015 if (is64BitELFv1ABI) {
5016 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5017 // entry point, but to the function descriptor (the function entry point
5018 // address is part of the function descriptor though).
5019 // The function descriptor is a three doubleword structure with the
5020 // following fields: function entry point, TOC base address and
5021 // environment pointer.
5022 // Thus for a call through a function pointer, the following actions need
5023 // to be performed:
5024 // 1. Save the TOC of the caller in the TOC save area of its stack
5025 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5026 // 2. Load the address of the function entry point from the function
5027 // descriptor.
5028 // 3. Load the TOC of the callee from the function descriptor into r2.
5029 // 4. Load the environment pointer from the function descriptor into
5030 // r11.
5031 // 5. Branch to the function entry point address.
5032 // 6. On return of the callee, the TOC of the caller needs to be
5033 // restored (this is done in FinishCall()).
5034 //
5035 // The loads are scheduled at the beginning of the call sequence, and the
5036 // register copies are flagged together to ensure that no other
5037 // operations can be scheduled in between. E.g. without flagging the
5038 // copies together, a TOC access in the caller could be scheduled between
5039 // the assignment of the callee TOC and the branch to the callee, which
5040 // results in the TOC access going through the TOC of the callee instead
5041 // of going through the TOC of the caller, which leads to incorrect code.
5042
5043 // Load the address of the function entry point from the function
5044 // descriptor.
5045 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5046 if (LDChain.getValueType() == MVT::Glue)
5047 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5048
5049 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5050 ? (MachineMemOperand::MODereferenceable |
5051 MachineMemOperand::MOInvariant)
5052 : MachineMemOperand::MONone;
5053
5054 MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
5055 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5056 /* Alignment = */ 8, MMOFlags);
5057
5058 // Load environment pointer into r11.
5059 SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5060 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5061 SDValue LoadEnvPtr =
5062 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5063 /* Alignment = */ 8, MMOFlags);
5064
5065 SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5066 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5067 SDValue TOCPtr =
5068 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5069 /* Alignment = */ 8, MMOFlags);
5070
5071 setUsesTOCBasePtr(DAG);
5072 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5073 InFlag);
5074 Chain = TOCVal.getValue(0);
5075 InFlag = TOCVal.getValue(1);
5076
5077 // If the function call has an explicit 'nest' parameter, it takes the
5078 // place of the environment pointer.
5079 if (!hasNest) {
5080 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5081 InFlag);
5082
5083 Chain = EnvVal.getValue(0);
5084 InFlag = EnvVal.getValue(1);
5085 }
5086
5087 MTCTROps[0] = Chain;
5088 MTCTROps[1] = LoadFuncPtr;
5089 MTCTROps[2] = InFlag;
5090 }
5091
5092 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5093 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5094 InFlag = Chain.getValue(1);
5095
5096 NodeTys.clear();
5097 NodeTys.push_back(MVT::Other);
5098 NodeTys.push_back(MVT::Glue);
5099 Ops.push_back(Chain);
5100 CallOpc = PPCISD::BCTRL;
5101 Callee.setNode(nullptr);
5102 // Add use of X11 (holding environment pointer)
5103 if (is64BitELFv1ABI && !hasNest)
5104 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5105 // Add CTR register as callee so a bctr can be emitted later.
5106 if (isTailCall)
5107 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5108 }
5109
5110 // If this is a direct call, pass the chain and the callee.
5111 if (Callee.getNode()) {
5112 Ops.push_back(Chain);
5113 Ops.push_back(Callee);
5114 }
5115 // If this is a tail call add stack pointer delta.
5116 if (isTailCall)
5117 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5118
5119 // Add argument registers to the end of the list so that they are known live
5120 // into the call.
5121 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5122 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5123 RegsToPass[i].second.getValueType()));
5124
5125 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5126 // live into the call.
5127 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5128 if ((isSVR4ABI && isPPC64) || isAIXABI) {
5129 setUsesTOCBasePtr(DAG);
5130
5131 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5132 // no way to mark dependencies as implicit here.
5133 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5134 if (!isPatchPoint)
5135 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
5136 : PPC::R2, PtrVT));
5137 }
5138
5139 return CallOpc;
5140}
5141
5142SDValue PPCTargetLowering::LowerCallResult(
5143 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5144 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5145 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5146 SmallVector<CCValAssign, 16> RVLocs;
5147 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5148 *DAG.getContext());
5149
5150 CCRetInfo.AnalyzeCallResult(
5151 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5152 ? RetCC_PPC_Cold
5153 : RetCC_PPC);
5154
5155 // Copy all of the result registers out of their specified physreg.
5156 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5157 CCValAssign &VA = RVLocs[i];
5158 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
;
5159
5160 SDValue Val;
5161
5162 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5163 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5164 InFlag);
5165 Chain = Lo.getValue(1);
5166 InFlag = Lo.getValue(2);
5167 VA = RVLocs[++i]; // skip ahead to next loc
5168 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5169 InFlag);
5170 Chain = Hi.getValue(1);
5171 InFlag = Hi.getValue(2);
5172 if (!Subtarget.isLittleEndian())
5173 std::swap (Lo, Hi);
5174 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5175 } else {
5176 Val = DAG.getCopyFromReg(Chain, dl,
5177 VA.getLocReg(), VA.getLocVT(), InFlag);
5178 Chain = Val.getValue(1);
5179 InFlag = Val.getValue(2);
5180 }
5181
5182 switch (VA.getLocInfo()) {
5183 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5183)
;
5184 case CCValAssign::Full: break;
5185 case CCValAssign::AExt:
5186 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5187 break;
5188 case CCValAssign::ZExt:
5189 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5190 DAG.getValueType(VA.getValVT()));
5191 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5192 break;
5193 case CCValAssign::SExt:
5194 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5195 DAG.getValueType(VA.getValVT()));
5196 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5197 break;
5198 }
5199
5200 InVals.push_back(Val);
5201 }
5202
5203 return Chain;
5204}
5205
5206SDValue PPCTargetLowering::FinishCall(
5207 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5208 bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5209 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5210 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5211 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5212 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5213 std::vector<EVT> NodeTys;
5214 SmallVector<SDValue, 8> Ops;
5215 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5216 SPDiff, isTailCall, isPatchPoint, hasNest,
5217 RegsToPass, Ops, NodeTys, CS, Subtarget);
5218
5219 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5220 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5221 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5222
5223 // When performing tail call optimization the callee pops its arguments off
5224 // the stack. Account for this here so these bytes can be pushed back on in
5225 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5226 int BytesCalleePops =
5227 (CallConv == CallingConv::Fast &&
5228 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
5229
5230 // Add a register mask operand representing the call-preserved registers.
5231 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5232 const uint32_t *Mask =
5233 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5234 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5234, __PRETTY_FUNCTION__))
;
5235 Ops.push_back(DAG.getRegisterMask(Mask));
5236
5237 if (InFlag.getNode())
5238 Ops.push_back(InFlag);
5239
5240 // Emit tail call.
5241 if (isTailCall) {
5242 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5243 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5244 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5245 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5246 isa<ConstantSDNode>(Callee)) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5247 "Expecting an global address, external symbol, absolute value or register")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
;
5248
5249 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5250 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5251 }
5252
5253 // Add a NOP immediately after the branch instruction when using the 64-bit
5254 // SVR4 or the AIX ABI.
5255 // At link time, if caller and callee are in a different module and
5256 // thus have a different TOC, the call will be replaced with a call to a stub
5257 // function which saves the current TOC, loads the TOC of the callee and
5258 // branches to the callee. The NOP will be replaced with a load instruction
5259 // which restores the TOC of the caller from the TOC save slot of the current
5260 // stack frame. If caller and callee belong to the same module (and have the
5261 // same TOC), the NOP will remain unchanged, or become some other NOP.
5262
5263 MachineFunction &MF = DAG.getMachineFunction();
5264 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5265 if (!isTailCall && !isPatchPoint &&
5266 ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
5267 Subtarget.isAIXABI())) {
5268 if (CallOpc == PPCISD::BCTRL) {
5269 if (Subtarget.isAIXABI())
5270 report_fatal_error("Indirect call on AIX is not implemented.");
5271
5272 // This is a call through a function pointer.
5273 // Restore the caller TOC from the save area into R2.
5274 // See PrepareCall() for more information about calls through function
5275 // pointers in the 64-bit SVR4 ABI.
5276 // We are using a target-specific load with r2 hard coded, because the
5277 // result of a target-independent load would never go directly into r2,
5278 // since r2 is a reserved register (which prevents the register allocator
5279 // from allocating it), resulting in an additional register being
5280 // allocated and an unnecessary move instruction being generated.
5281 CallOpc = PPCISD::BCTRL_LOAD_TOC;
5282
5283 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5284 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5285 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5286 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5287
5288 // The address needs to go after the chain input but before the flag (or
5289 // any other variadic arguments).
5290 Ops.insert(std::next(Ops.begin()), AddTOC);
5291 } else if (CallOpc == PPCISD::CALL &&
5292 !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5293 // Otherwise insert NOP for non-local calls.
5294 CallOpc = PPCISD::CALL_NOP;
5295 }
5296 }
5297
5298 if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
5299 // On AIX, direct function calls reference the symbol for the function's
5300 // entry point, which is named by inserting a "." before the function's
5301 // C-linkage name.
5302 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
5303 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5304 MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
5305 Twine(G->getGlobal()->getName()));
5306 Callee = DAG.getMCSymbol(S, PtrVT);
5307 // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5308 Ops[1] = Callee;
5309 }
5310
5311 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5312 InFlag = Chain.getValue(1);
5313
5314 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5315 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5316 InFlag, dl);
5317 if (!Ins.empty())
5318 InFlag = Chain.getValue(1);
5319
5320 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5321 Ins, dl, DAG, InVals);
5322}
5323
5324SDValue
5325PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5326 SmallVectorImpl<SDValue> &InVals) const {
5327 SelectionDAG &DAG = CLI.DAG;
5328 SDLoc &dl = CLI.DL;
5329 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5330 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5331 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5332 SDValue Chain = CLI.Chain;
5333 SDValue Callee = CLI.Callee;
5334 bool &isTailCall = CLI.IsTailCall;
5335 CallingConv::ID CallConv = CLI.CallConv;
5336 bool isVarArg = CLI.IsVarArg;
5337 bool isPatchPoint = CLI.IsPatchPoint;
5338 ImmutableCallSite CS = CLI.CS;
5339
5340 if (isTailCall) {
5341 if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5342 isTailCall = false;
5343 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5344 isTailCall =
5345 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5346 isVarArg, Outs, Ins, DAG);
5347 else
5348 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5349 Ins, DAG);
5350 if (isTailCall) {
5351 ++NumTailCalls;
5352 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5353 ++NumSiblingCalls;
5354
5355 assert(isa<GlobalAddressSDNode>(Callee) &&((isa<GlobalAddressSDNode>(Callee) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("isa<GlobalAddressSDNode>(Callee) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5356, __PRETTY_FUNCTION__))
5356 "Callee should be an llvm::Function object.")((isa<GlobalAddressSDNode>(Callee) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("isa<GlobalAddressSDNode>(Callee) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5356, __PRETTY_FUNCTION__))
;
5357 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5358 const GlobalValue *GV =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5359 cast<GlobalAddressSDNode>(Callee)->getGlobal();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5360 const unsigned Width =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5361 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5362 dbgs() << "TCO caller: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5363 << left_justify(DAG.getMachineFunction().getName(), Width)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5364 << ", callee linkage: " << GV->getVisibility() << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5365 << GV->getLinkage() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
;
5366 }
5367 }
5368
5369 if (!isTailCall && CS && CS.isMustTailCall())
5370 report_fatal_error("failed to perform tail call elimination on a call "
5371 "site marked musttail");
5372
5373 // When long calls (i.e. indirect calls) are always used, calls are always
5374 // made via function pointer. If we have a function name, first translate it
5375 // into a pointer.
5376 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5377 !isTailCall)
5378 Callee = LowerGlobalAddress(Callee, DAG);
5379
5380 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5381 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5382 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5383 dl, DAG, InVals, CS);
5384
5385 if (Subtarget.isSVR4ABI())
5386 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5387 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5388 dl, DAG, InVals, CS);
5389
5390 if (Subtarget.isAIXABI())
5391 return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
5392 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5393 dl, DAG, InVals, CS);
5394
5395 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5396 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5397 dl, DAG, InVals, CS);
5398}
5399
5400SDValue PPCTargetLowering::LowerCall_32SVR4(
5401 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5402 bool isTailCall, bool isPatchPoint,
5403 const SmallVectorImpl<ISD::OutputArg> &Outs,
5404 const SmallVectorImpl<SDValue> &OutVals,
5405 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5406 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5407 ImmutableCallSite CS) const {
5408 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5409 // of the 32-bit SVR4 ABI stack frame layout.
5410
5411 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
5412 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
5413 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
;
5414
5415 unsigned PtrByteSize = 4;
5416
5417 MachineFunction &MF = DAG.getMachineFunction();
5418
5419 // Mark this function as potentially containing a function that contains a
5420 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5421 // and restoring the callers stack pointer in this functions epilog. This is
5422 // done because by tail calling the called function might overwrite the value
5423 // in this function's (MF) stack pointer stack slot 0(SP).
5424 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5425 CallConv == CallingConv::Fast)
5426 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5427
5428 // Count how many bytes are to be pushed on the stack, including the linkage
5429 // area, parameter list area and the part of the local variable space which
5430 // contains copies of aggregates which are passed by value.
5431
5432 // Assign locations to all of the outgoing arguments.
5433 SmallVector<CCValAssign, 16> ArgLocs;
5434 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5435
5436 // Reserve space for the linkage area on the stack.
5437 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5438 PtrByteSize);
5439 if (useSoftFloat())
5440 CCInfo.PreAnalyzeCallOperands(Outs);
5441
5442 if (isVarArg) {
5443 // Handle fixed and variable vector arguments differently.
5444 // Fixed vector arguments go into registers as long as registers are
5445 // available. Variable vector arguments always go into memory.
5446 unsigned NumArgs = Outs.size();
5447
5448 for (unsigned i = 0; i != NumArgs; ++i) {
5449 MVT ArgVT = Outs[i].VT;
5450 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5451 bool Result;
5452
5453 if (Outs[i].IsFixed) {
5454 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5455 CCInfo);
5456 } else {
5457 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5458 ArgFlags, CCInfo);
5459 }
5460
5461 if (Result) {
5462#ifndef NDEBUG
5463 errs() << "Call operand #" << i << " has unhandled type "
5464 << EVT(ArgVT).getEVTString() << "\n";
5465#endif
5466 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5466)
;
5467 }
5468 }
5469 } else {
5470 // All arguments are treated the same.
5471 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5472 }
5473 CCInfo.clearWasPPCF128();
5474
5475 // Assign locations to all of the outgoing aggregate by value arguments.
5476 SmallVector<CCValAssign, 16> ByValArgLocs;
5477 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5478
5479 // Reserve stack space for the allocations in CCInfo.
5480 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5481
5482 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5483
5484 // Size of the linkage area, parameter list area and the part of the local
5485 // space variable where copies of aggregates which are passed by value are
5486 // stored.
5487 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5488
5489 // Calculate by how many bytes the stack has to be adjusted in case of tail
5490 // call optimization.
5491 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5492
5493 // Adjust the stack pointer for the new arguments...
5494 // These operations are automatically eliminated by the prolog/epilog pass
5495 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5496 SDValue CallSeqStart = Chain;
5497
5498 // Load the return address and frame pointer so it can be moved somewhere else
5499 // later.
5500 SDValue LROp, FPOp;
5501 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5502
5503 // Set up a copy of the stack pointer for use loading and storing any
5504 // arguments that may not fit in the registers available for argument
5505 // passing.
5506 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5507
5508 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5509 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5510 SmallVector<SDValue, 8> MemOpChains;
5511
5512 bool seenFloatArg = false;
5513 // Walk the register/memloc assignments, inserting copies/loads.
5514 // i - Tracks the index into the list of registers allocated for the call
5515 // RealArgIdx - Tracks the index into the list of actual function arguments
5516 // j - Tracks the index into the list of byval arguments
5517 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5518 i != e;
5519 ++i, ++RealArgIdx) {
5520 CCValAssign &VA = ArgLocs[i];
5521 SDValue Arg = OutVals[RealArgIdx];
5522 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5523
5524 if (Flags.isByVal()) {
5525 // Argument is an aggregate which is passed by value, thus we need to
5526 // create a copy of it in the local variable space of the current stack
5527 // frame (which is the stack frame of the caller) and pass the address of
5528 // this copy to the callee.
5529 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5529, __PRETTY_FUNCTION__))
;
5530 CCValAssign &ByValVA = ByValArgLocs[j++];
5531 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5531, __PRETTY_FUNCTION__))
;
5532
5533 // Memory reserved in the local variable space of the callers stack frame.
5534 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5535
5536 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5537 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5538 StackPtr, PtrOff);
5539
5540 // Create a copy of the argument in the local area of the current
5541 // stack frame.
5542 SDValue MemcpyCall =
5543 CreateCopyOfByValArgument(Arg, PtrOff,
5544 CallSeqStart.getNode()->getOperand(0),
5545 Flags, DAG, dl);
5546
5547 // This must go outside the CALLSEQ_START..END.
5548 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5549 SDLoc(MemcpyCall));
5550 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5551 NewCallSeqStart.getNode());
5552 Chain = CallSeqStart = NewCallSeqStart;
5553
5554 // Pass the address of the aggregate copy on the stack either in a
5555 // physical register or in the parameter list area of the current stack
5556 // frame to the callee.
5557 Arg = PtrOff;
5558 }
5559
5560 // When useCRBits() is true, there can be i1 arguments.
5561 // It is because getRegisterType(MVT::i1) => MVT::i1,
5562 // and for other integer types getRegisterType() => MVT::i32.
5563 // Extend i1 and ensure callee will get i32.
5564 if (Arg.getValueType() == MVT::i1)
5565 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5566 dl, MVT::i32, Arg);
5567
5568 if (VA.isRegLoc()) {
5569 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5570 // Put argument in a physical register.
5571 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5572 bool IsLE = Subtarget.isLittleEndian();
5573 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5574 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5575 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5576 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5577 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5578 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5579 SVal.getValue(0)));
5580 } else
5581 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5582 } else {
5583 // Put argument in the parameter list area of the current stack frame.
5584 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5584, __PRETTY_FUNCTION__))
;
5585 unsigned LocMemOffset = VA.getLocMemOffset();
5586
5587 if (!isTailCall) {
5588 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5589 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5590 StackPtr, PtrOff);
5591
5592 MemOpChains.push_back(
5593 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5594 } else {
5595 // Calculate and remember argument location.
5596 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5597 TailCallArguments);
5598 }
5599 }
5600 }
5601
5602 if (!MemOpChains.empty())
5603 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5604
5605 // Build a sequence of copy-to-reg nodes chained together with token chain
5606 // and flag operands which copy the outgoing args into the appropriate regs.
5607 SDValue InFlag;
5608 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5609 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5610 RegsToPass[i].second, InFlag);
5611 InFlag = Chain.getValue(1);
5612 }
5613
5614 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5615 // registers.
5616 if (isVarArg) {
5617 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5618 SDValue Ops[] = { Chain, InFlag };
5619
5620 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5621 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5622
5623 InFlag = Chain.getValue(1);
5624 }
5625
5626 if (isTailCall)
5627 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5628 TailCallArguments);
5629
5630 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5631 /* unused except on PPC64 ELFv1 */ false, DAG,
5632 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5633 NumBytes, Ins, InVals, CS);
5634}
5635
5636// Copy an argument into memory, being careful to do this outside the
5637// call sequence for the call to which the argument belongs.
5638SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5639 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5640 SelectionDAG &DAG, const SDLoc &dl) const {
5641 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5642 CallSeqStart.getNode()->getOperand(0),
5643 Flags, DAG, dl);
5644 // The MEMCPY must go outside the CALLSEQ_START..END.
5645 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5646 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5647 SDLoc(MemcpyCall));
5648 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5649 NewCallSeqStart.getNode());
5650 return NewCallSeqStart;
5651}
5652
5653SDValue PPCTargetLowering::LowerCall_64SVR4(
5654 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5655 bool isTailCall, bool isPatchPoint,
5656 const SmallVectorImpl<ISD::OutputArg> &Outs,
5657 const SmallVectorImpl<SDValue> &OutVals,
5658 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5659 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5660 ImmutableCallSite CS) const {
5661 bool isELFv2ABI = Subtarget.isELFv2ABI();
5662 bool isLittleEndian = Subtarget.isLittleEndian();
5663 unsigned NumOps = Outs.size();
5664 bool hasNest = false;
5665 bool IsSibCall = false;
5666
5667 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5668 unsigned PtrByteSize = 8;
5669
5670 MachineFunction &MF = DAG.getMachineFunction();
5671
5672 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5673 IsSibCall = true;
5674
5675 // Mark this function as potentially containing a function that contains a
5676 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5677 // and restoring the callers stack pointer in this functions epilog. This is
5678 // done because by tail calling the called function might overwrite the value
5679 // in this function's (MF) stack pointer stack slot 0(SP).
5680 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5681 CallConv == CallingConv::Fast)
5682 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5683
5684 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5685, __PRETTY_FUNCTION__))
5685 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5685, __PRETTY_FUNCTION__))
;
5686
5687 // Count how many bytes are to be pushed on the stack, including the linkage
5688 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5689 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5690 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5691 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5692 unsigned NumBytes = LinkageSize;
5693 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5694 unsigned &QFPR_idx = FPR_idx;
5695
5696 static const MCPhysReg GPR[] = {
5697 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5698 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5699 };
5700 static const MCPhysReg VR[] = {
5701 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5702 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5703 };
5704
5705 const unsigned NumGPRs = array_lengthof(GPR);
5706 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5707 const unsigned NumVRs = array_lengthof(VR);
5708 const unsigned NumQFPRs = NumFPRs;
5709
5710 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5711 // can be passed to the callee in registers.
5712 // For the fast calling convention, there is another check below.
5713 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5714 bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5715 if (!HasParameterArea) {
5716 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5717 unsigned AvailableFPRs = NumFPRs;
5718 unsigned AvailableVRs = NumVRs;
5719 unsigned NumBytesTmp = NumBytes;
5720 for (unsigned i = 0; i != NumOps; ++i) {
5721 if (Outs[i].Flags.isNest()) continue;
5722 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5723 PtrByteSize, LinkageSize, ParamAreaSize,
5724 NumBytesTmp, AvailableFPRs, AvailableVRs,
5725 Subtarget.hasQPX()))
5726 HasParameterArea = true;
5727 }
5728 }
5729
5730 // When using the fast calling convention, we don't provide backing for
5731 // arguments that will be in registers.
5732 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5733
5734 // Avoid allocating parameter area for fastcc functions if all the arguments
5735 // can be passed in the registers.
5736 if (CallConv == CallingConv::Fast)
5737 HasParameterArea = false;
5738
5739 // Add up all the space actually used.
5740 for (unsigned i = 0; i != NumOps; ++i) {
5741 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5742 EVT ArgVT = Outs[i].VT;
5743 EVT OrigVT = Outs[i].ArgVT;
5744
5745 if (Flags.isNest())
5746 continue;
5747
5748 if (CallConv == CallingConv::Fast) {
5749 if (Flags.isByVal()) {
5750 NumGPRsUsed += (Flags.getByValSize()+7)/8;
5751 if (NumGPRsUsed > NumGPRs)
5752 HasParameterArea = true;
5753 } else {
5754 switch (ArgVT.getSimpleVT().SimpleTy) {
5755 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5755)
;
5756 case MVT::i1:
5757 case MVT::i32:
5758 case MVT::i64:
5759 if (++NumGPRsUsed <= NumGPRs)
5760 continue;
5761 break;
5762 case MVT::v4i32:
5763 case MVT::v8i16:
5764 case MVT::v16i8:
5765 case MVT::v2f64:
5766 case MVT::v2i64:
5767 case MVT::v1i128:
5768 case MVT::f128:
5769 if (++NumVRsUsed <= NumVRs)
5770 continue;
5771 break;
5772 case MVT::v4f32:
5773 // When using QPX, this is handled like a FP register, otherwise, it
5774 // is an Altivec register.
5775 if (Subtarget.hasQPX()) {
5776 if (++NumFPRsUsed <= NumFPRs)
5777 continue;
5778 } else {
5779 if (++NumVRsUsed <= NumVRs)
5780 continue;
5781 }
5782 break;
5783 case MVT::f32:
5784 case MVT::f64:
5785 case MVT::v4f64: // QPX
5786 case MVT::v4i1: // QPX
5787 if (++NumFPRsUsed <= NumFPRs)
5788 continue;
5789 break;
5790 }
5791 HasParameterArea = true;
5792 }
5793 }
5794
5795 /* Respect alignment of argument on the stack. */
5796 unsigned Align =
5797 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5798 NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5799
5800 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5801 if (Flags.isInConsecutiveRegsLast())
5802 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5803 }
5804
5805 unsigned NumBytesActuallyUsed = NumBytes;
5806
5807 // In the old ELFv1 ABI,
5808 // the prolog code of the callee may store up to 8 GPR argument registers to
5809 // the stack, allowing va_start to index over them in memory if its varargs.
5810 // Because we cannot tell if this is needed on the caller side, we have to
5811 // conservatively assume that it is needed. As such, make sure we have at
5812 // least enough stack space for the caller to store the 8 GPRs.
5813 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5814 // really requires memory operands, e.g. a vararg function.
5815 if (HasParameterArea)
5816 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5817 else
5818 NumBytes = LinkageSize;
5819
5820 // Tail call needs the stack to be aligned.
5821 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5822 CallConv == CallingConv::Fast)
5823 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5824
5825 int SPDiff = 0;
5826
5827 // Calculate by how many bytes the stack has to be adjusted in case of tail
5828 // call optimization.
5829 if (!IsSibCall)
5830 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5831
5832 // To protect arguments on the stack from being clobbered in a tail call,
5833 // force all the loads to happen before doing any other lowering.
5834 if (isTailCall)
5835 Chain = DAG.getStackArgumentTokenFactor(Chain);
5836
5837 // Adjust the stack pointer for the new arguments...
5838 // These operations are automatically eliminated by the prolog/epilog pass
5839 if (!IsSibCall)
5840 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5841 SDValue CallSeqStart = Chain;
5842
5843 // Load the return address and frame pointer so it can be move somewhere else
5844 // later.
5845 SDValue LROp, FPOp;
5846 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5847
5848 // Set up a copy of the stack pointer for use loading and storing any
5849 // arguments that may not fit in the registers available for argument
5850 // passing.
5851 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5852
5853 // Figure out which arguments are going to go in registers, and which in
5854 // memory. Also, if this is a vararg function, floating point operations
5855 // must be stored to our stack, and loaded into integer regs as well, if
5856 // any integer regs are available for argument passing.
5857 unsigned ArgOffset = LinkageSize;
5858
5859 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5860 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5861
5862 SmallVector<SDValue, 8> MemOpChains;
5863 for (unsigned i = 0; i != NumOps; ++i) {
5864 SDValue Arg = OutVals[i];
5865 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5866 EVT ArgVT = Outs[i].VT;
5867 EVT OrigVT = Outs[i].ArgVT;
5868
5869 // PtrOff will be used to store the current argument to the stack if a
5870 // register cannot be found for it.
5871 SDValue PtrOff;
5872
5873 // We re-align the argument offset for each argument, except when using the
5874 // fast calling convention, when we need to make sure we do that only when
5875 // we'll actually use a stack slot.
5876 auto ComputePtrOff = [&]() {
5877 /* Respect alignment of argument on the stack. */
5878 unsigned Align =
5879 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5880 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5881
5882 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5883
5884 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5885 };
5886
5887 if (CallConv != CallingConv::Fast) {
5888 ComputePtrOff();
5889
5890 /* Compute GPR index associated with argument offset. */
5891 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5892 GPR_idx = std::min(GPR_idx, NumGPRs);
5893 }
5894
5895 // Promote integers to 64-bit values.
5896 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5897 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5898 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5899 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5900 }
5901
5902 // FIXME memcpy is used way more than necessary. Correctness first.
5903 // Note: "by value" is code for passing a structure by value, not
5904 // basic types.
5905 if (Flags.isByVal()) {
5906 // Note: Size includes alignment padding, so
5907 // struct x { short a; char b; }
5908 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5909 // These are the proper values we need for right-justifying the
5910 // aggregate in a parameter register.
5911 unsigned Size = Flags.getByValSize();
5912
5913 // An emp