Bug Summary

File:llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 9266, column 36
Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/build-llvm/include -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-11-21-121427-42170-1 -x c++ /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCRegisterInfo.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MachineValueType.h"
90#include "llvm/Support/MathExtras.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Target/TargetMachine.h"
93#include "llvm/Target/TargetOptions.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"}
;
125STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"}
;
126STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
}
;
127STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
}
;
128
129static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
130
131static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
132
133// FIXME: Remove this once the bug has been fixed!
134extern cl::opt<bool> ANDIGlueBug;
135
136PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
137 const PPCSubtarget &STI)
138 : TargetLowering(TM), Subtarget(STI) {
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64 = Subtarget.isPPC64();
142 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
143
144 // Set up the register classes.
145 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146 if (!useSoftFloat()) {
147 if (hasSPE()) {
148 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
149 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150 } else {
151 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153 }
154 }
155
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT : MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167 }
168
169 if (Subtarget.isISA3_0()) {
170 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
171 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
172 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
173 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
174 } else {
175 // No extending loads from f16 or HW conversions back and forth.
176 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
177 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
178 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
179 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
180 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
181 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
182 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
183 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
184 }
185
186 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187
188 // PowerPC has pre-inc load and store's.
189 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
190 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
191 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
192 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
193 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
194 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
195 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
196 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
197 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
198 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
199 if (!Subtarget.hasSPE()) {
200 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
201 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
202 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
203 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
204 }
205
206 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
207 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
208 for (MVT VT : ScalarIntVTs) {
209 setOperationAction(ISD::ADDC, VT, Legal);
210 setOperationAction(ISD::ADDE, VT, Legal);
211 setOperationAction(ISD::SUBC, VT, Legal);
212 setOperationAction(ISD::SUBE, VT, Legal);
213 }
214
215 if (Subtarget.useCRBits()) {
216 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
217
218 if (isPPC64 || Subtarget.hasFPCVT()) {
219 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
220 AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
221 isPPC64 ? MVT::i64 : MVT::i32);
222 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
223 AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
224 isPPC64 ? MVT::i64 : MVT::i32);
225
226 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
227 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
228 isPPC64 ? MVT::i64 : MVT::i32);
229 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
230 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
231 isPPC64 ? MVT::i64 : MVT::i32);
232 } else {
233 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
234 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
235 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
236 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
237 }
238
239 // PowerPC does not support direct load/store of condition registers.
240 setOperationAction(ISD::LOAD, MVT::i1, Custom);
241 setOperationAction(ISD::STORE, MVT::i1, Custom);
242
243 // FIXME: Remove this once the ANDI glue bug is fixed:
244 if (ANDIGlueBug)
245 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
246
247 for (MVT VT : MVT::integer_valuetypes()) {
248 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
249 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
250 setTruncStoreAction(VT, MVT::i1, Expand);
251 }
252
253 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
254 }
255
256 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
257 // PPC (the libcall is not available).
258 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
259 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
260 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
261 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
262
263 // We do not currently implement these libm ops for PowerPC.
264 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
265 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
266 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
267 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
268 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
269 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
270
271 // PowerPC has no SREM/UREM instructions unless we are on P9
272 // On P9 we may use a hardware instruction to compute the remainder.
273 // When the result of both the remainder and the division is required it is
274 // more efficient to compute the remainder from the result of the division
275 // rather than use the remainder instruction. The instructions are legalized
276 // directly because the DivRemPairsPass performs the transformation at the IR
277 // level.
278 if (Subtarget.isISA3_0()) {
279 setOperationAction(ISD::SREM, MVT::i32, Legal);
280 setOperationAction(ISD::UREM, MVT::i32, Legal);
281 setOperationAction(ISD::SREM, MVT::i64, Legal);
282 setOperationAction(ISD::UREM, MVT::i64, Legal);
283 } else {
284 setOperationAction(ISD::SREM, MVT::i32, Expand);
285 setOperationAction(ISD::UREM, MVT::i32, Expand);
286 setOperationAction(ISD::SREM, MVT::i64, Expand);
287 setOperationAction(ISD::UREM, MVT::i64, Expand);
288 }
289
290 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
291 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
292 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
293 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
294 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
295 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
296 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
297 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
298 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
299
300 // Handle constrained floating-point operations of scalar.
301 // TODO: Handle SPE specific operation.
302 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
303 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
304 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
305 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
306 setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
307 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
308
309 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
310 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
311 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
312 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
313 setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
314 if (Subtarget.hasVSX()) {
315 setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
316 setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
317 }
318
319 if (Subtarget.hasFSQRT()) {
320 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
321 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
322 }
323
324 if (Subtarget.hasFPRND()) {
325 setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
326 setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
327 setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
328 setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
329
330 setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
331 setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
332 setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
333 setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
334 }
335
336 // We don't support sin/cos/sqrt/fmod/pow
337 setOperationAction(ISD::FSIN , MVT::f64, Expand);
338 setOperationAction(ISD::FCOS , MVT::f64, Expand);
339 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
340 setOperationAction(ISD::FREM , MVT::f64, Expand);
341 setOperationAction(ISD::FPOW , MVT::f64, Expand);
342 setOperationAction(ISD::FSIN , MVT::f32, Expand);
343 setOperationAction(ISD::FCOS , MVT::f32, Expand);
344 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
345 setOperationAction(ISD::FREM , MVT::f32, Expand);
346 setOperationAction(ISD::FPOW , MVT::f32, Expand);
347 if (Subtarget.hasSPE()) {
348 setOperationAction(ISD::FMA , MVT::f64, Expand);
349 setOperationAction(ISD::FMA , MVT::f32, Expand);
350 } else {
351 setOperationAction(ISD::FMA , MVT::f64, Legal);
352 setOperationAction(ISD::FMA , MVT::f32, Legal);
353 }
354
355 if (Subtarget.hasSPE())
356 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
357
358 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
359
360 // If we're enabling GP optimizations, use hardware square root
361 if (!Subtarget.hasFSQRT() &&
362 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
363 Subtarget.hasFRE()))
364 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
365
366 if (!Subtarget.hasFSQRT() &&
367 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
368 Subtarget.hasFRES()))
369 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
370
371 if (Subtarget.hasFCPSGN()) {
372 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
373 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
374 } else {
375 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
376 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
377 }
378
379 if (Subtarget.hasFPRND()) {
380 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
381 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
382 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
383 setOperationAction(ISD::FROUND, MVT::f64, Legal);
384
385 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
386 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
387 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
388 setOperationAction(ISD::FROUND, MVT::f32, Legal);
389 }
390
391 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
392 // to speed up scalar BSWAP64.
393 // CTPOP or CTTZ were introduced in P8/P9 respectively
394 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
395 if (Subtarget.hasP9Vector())
396 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
397 else
398 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
399 if (Subtarget.isISA3_0()) {
400 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
401 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
402 } else {
403 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
404 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
405 }
406
407 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
408 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
409 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
410 } else {
411 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
412 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
413 }
414
415 // PowerPC does not have ROTR
416 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
417 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
418
419 if (!Subtarget.useCRBits()) {
420 // PowerPC does not have Select
421 setOperationAction(ISD::SELECT, MVT::i32, Expand);
422 setOperationAction(ISD::SELECT, MVT::i64, Expand);
423 setOperationAction(ISD::SELECT, MVT::f32, Expand);
424 setOperationAction(ISD::SELECT, MVT::f64, Expand);
425 }
426
427 // PowerPC wants to turn select_cc of FP into fsel when possible.
428 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
429 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
430
431 // PowerPC wants to optimize integer setcc a bit
432 if (!Subtarget.useCRBits())
433 setOperationAction(ISD::SETCC, MVT::i32, Custom);
434
435 if (Subtarget.hasFPU()) {
436 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
437 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
438 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
439
440 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
441 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
443 }
444
445 // PowerPC does not have BRCOND which requires SetCC
446 if (!Subtarget.useCRBits())
447 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
448
449 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
450
451 if (Subtarget.hasSPE()) {
452 // SPE has built-in conversions
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
454 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
455 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
456 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
457 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
458 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
459 } else {
460 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
461 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
462 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
463
464 // PowerPC does not have [U|S]INT_TO_FP
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
466 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
467 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
469 }
470
471 if (Subtarget.hasDirectMove() && isPPC64) {
472 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
473 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
474 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
475 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
476 if (TM.Options.UnsafeFPMath) {
477 setOperationAction(ISD::LRINT, MVT::f64, Legal);
478 setOperationAction(ISD::LRINT, MVT::f32, Legal);
479 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
480 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
481 setOperationAction(ISD::LROUND, MVT::f64, Legal);
482 setOperationAction(ISD::LROUND, MVT::f32, Legal);
483 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
484 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
485 }
486 } else {
487 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
488 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
489 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
490 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
491 }
492
493 // We cannot sextinreg(i1). Expand to shifts.
494 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
495
496 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
497 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
498 // support continuation, user-level threading, and etc.. As a result, no
499 // other SjLj exception interfaces are implemented and please don't build
500 // your own exception handling based on them.
501 // LLVM/Clang supports zero-cost DWARF exception handling.
502 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
503 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
504
505 // We want to legalize GlobalAddress and ConstantPool nodes into the
506 // appropriate instructions to materialize the address.
507 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
508 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
509 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
510 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
511 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
512 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
513 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
514 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
515 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
516 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
517
518 // TRAP is legal.
519 setOperationAction(ISD::TRAP, MVT::Other, Legal);
520
521 // TRAMPOLINE is custom lowered.
522 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
523 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
524
525 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
526 setOperationAction(ISD::VASTART , MVT::Other, Custom);
527
528 if (Subtarget.is64BitELFABI()) {
529 // VAARG always uses double-word chunks, so promote anything smaller.
530 setOperationAction(ISD::VAARG, MVT::i1, Promote);
531 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
532 setOperationAction(ISD::VAARG, MVT::i8, Promote);
533 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
534 setOperationAction(ISD::VAARG, MVT::i16, Promote);
535 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
536 setOperationAction(ISD::VAARG, MVT::i32, Promote);
537 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
538 setOperationAction(ISD::VAARG, MVT::Other, Expand);
539 } else if (Subtarget.is32BitELFABI()) {
540 // VAARG is custom lowered with the 32-bit SVR4 ABI.
541 setOperationAction(ISD::VAARG, MVT::Other, Custom);
542 setOperationAction(ISD::VAARG, MVT::i64, Custom);
543 } else
544 setOperationAction(ISD::VAARG, MVT::Other, Expand);
545
546 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
547 if (Subtarget.is32BitELFABI())
548 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
549 else
550 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
551
552 // Use the default implementation.
553 setOperationAction(ISD::VAEND , MVT::Other, Expand);
554 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
555 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
556 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
557 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
558 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
559 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
560 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
561 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
562
563 // We want to custom lower some of our intrinsics.
564 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
565
566 // To handle counter-based loop conditions.
567 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
568
569 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
570 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
571 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
572 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
573
574 // Comparisons that require checking two conditions.
575 if (Subtarget.hasSPE()) {
576 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
577 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
578 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
579 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
580 }
581 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
582 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
583 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
584 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
585 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
586 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
587 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
588 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
589 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
590 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
591 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
592 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
593
594 if (Subtarget.has64BitSupport()) {
595 // They also have instructions for converting between i64 and fp.
596 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
597 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
598 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
599 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
600 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
601 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
602 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
603 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
604 // This is just the low 32 bits of a (signed) fp->i64 conversion.
605 // We cannot do this with Promote because i64 is not a legal type.
606 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
607 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
608
609 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
610 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
611 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
612 }
613 } else {
614 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
615 if (Subtarget.hasSPE()) {
616 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
617 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
618 } else {
619 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
620 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
621 }
622 }
623
624 // With the instructions enabled under FPCVT, we can do everything.
625 if (Subtarget.hasFPCVT()) {
626 if (Subtarget.has64BitSupport()) {
627 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
628 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
629 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
630 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
631 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
632 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
633 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
634 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
635 }
636
637 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
638 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
639 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
640 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
641 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
642 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
643 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
644 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
645 }
646
647 if (Subtarget.use64BitRegs()) {
648 // 64-bit PowerPC implementations can support i64 types directly
649 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
650 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
651 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
652 // 64-bit PowerPC wants to expand i128 shifts itself.
653 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
654 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
655 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
656 } else {
657 // 32-bit PowerPC wants to expand i64 shifts itself.
658 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
659 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
660 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
661 }
662
663 // PowerPC has better expansions for funnel shifts than the generic
664 // TargetLowering::expandFunnelShift.
665 if (Subtarget.has64BitSupport()) {
666 setOperationAction(ISD::FSHL, MVT::i64, Custom);
667 setOperationAction(ISD::FSHR, MVT::i64, Custom);
668 }
669 setOperationAction(ISD::FSHL, MVT::i32, Custom);
670 setOperationAction(ISD::FSHR, MVT::i32, Custom);
671
672 if (Subtarget.hasVSX()) {
673 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
674 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
675 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
676 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
677 }
678
679 if (Subtarget.hasAltivec()) {
680 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
681 setOperationAction(ISD::SADDSAT, VT, Legal);
682 setOperationAction(ISD::SSUBSAT, VT, Legal);
683 setOperationAction(ISD::UADDSAT, VT, Legal);
684 setOperationAction(ISD::USUBSAT, VT, Legal);
685 }
686 // First set operation action for all vector types to expand. Then we
687 // will selectively turn on ones that can be effectively codegen'd.
688 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
689 // add/sub are legal for all supported vector VT's.
690 setOperationAction(ISD::ADD, VT, Legal);
691 setOperationAction(ISD::SUB, VT, Legal);
692
693 // For v2i64, these are only valid with P8Vector. This is corrected after
694 // the loop.
695 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
696 setOperationAction(ISD::SMAX, VT, Legal);
697 setOperationAction(ISD::SMIN, VT, Legal);
698 setOperationAction(ISD::UMAX, VT, Legal);
699 setOperationAction(ISD::UMIN, VT, Legal);
700 }
701 else {
702 setOperationAction(ISD::SMAX, VT, Expand);
703 setOperationAction(ISD::SMIN, VT, Expand);
704 setOperationAction(ISD::UMAX, VT, Expand);
705 setOperationAction(ISD::UMIN, VT, Expand);
706 }
707
708 if (Subtarget.hasVSX()) {
709 setOperationAction(ISD::FMAXNUM, VT, Legal);
710 setOperationAction(ISD::FMINNUM, VT, Legal);
711 }
712
713 // Vector instructions introduced in P8
714 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
715 setOperationAction(ISD::CTPOP, VT, Legal);
716 setOperationAction(ISD::CTLZ, VT, Legal);
717 }
718 else {
719 setOperationAction(ISD::CTPOP, VT, Expand);
720 setOperationAction(ISD::CTLZ, VT, Expand);
721 }
722
723 // Vector instructions introduced in P9
724 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
725 setOperationAction(ISD::CTTZ, VT, Legal);
726 else
727 setOperationAction(ISD::CTTZ, VT, Expand);
728
729 // We promote all shuffles to v16i8.
730 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
731 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
732
733 // We promote all non-typed operations to v4i32.
734 setOperationAction(ISD::AND , VT, Promote);
735 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
736 setOperationAction(ISD::OR , VT, Promote);
737 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
738 setOperationAction(ISD::XOR , VT, Promote);
739 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
740 setOperationAction(ISD::LOAD , VT, Promote);
741 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
742 setOperationAction(ISD::SELECT, VT, Promote);
743 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
744 setOperationAction(ISD::VSELECT, VT, Legal);
745 setOperationAction(ISD::SELECT_CC, VT, Promote);
746 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
747 setOperationAction(ISD::STORE, VT, Promote);
748 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
749
750 // No other operations are legal.
751 setOperationAction(ISD::MUL , VT, Expand);
752 setOperationAction(ISD::SDIV, VT, Expand);
753 setOperationAction(ISD::SREM, VT, Expand);
754 setOperationAction(ISD::UDIV, VT, Expand);
755 setOperationAction(ISD::UREM, VT, Expand);
756 setOperationAction(ISD::FDIV, VT, Expand);
757 setOperationAction(ISD::FREM, VT, Expand);
758 setOperationAction(ISD::FNEG, VT, Expand);
759 setOperationAction(ISD::FSQRT, VT, Expand);
760 setOperationAction(ISD::FLOG, VT, Expand);
761 setOperationAction(ISD::FLOG10, VT, Expand);
762 setOperationAction(ISD::FLOG2, VT, Expand);
763 setOperationAction(ISD::FEXP, VT, Expand);
764 setOperationAction(ISD::FEXP2, VT, Expand);
765 setOperationAction(ISD::FSIN, VT, Expand);
766 setOperationAction(ISD::FCOS, VT, Expand);
767 setOperationAction(ISD::FABS, VT, Expand);
768 setOperationAction(ISD::FFLOOR, VT, Expand);
769 setOperationAction(ISD::FCEIL, VT, Expand);
770 setOperationAction(ISD::FTRUNC, VT, Expand);
771 setOperationAction(ISD::FRINT, VT, Expand);
772 setOperationAction(ISD::FNEARBYINT, VT, Expand);
773 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
774 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
775 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
776 setOperationAction(ISD::MULHU, VT, Expand);
777 setOperationAction(ISD::MULHS, VT, Expand);
778 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
779 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
780 setOperationAction(ISD::UDIVREM, VT, Expand);
781 setOperationAction(ISD::SDIVREM, VT, Expand);
782 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
783 setOperationAction(ISD::FPOW, VT, Expand);
784 setOperationAction(ISD::BSWAP, VT, Expand);
785 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
786 setOperationAction(ISD::ROTL, VT, Expand);
787 setOperationAction(ISD::ROTR, VT, Expand);
788
789 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
790 setTruncStoreAction(VT, InnerVT, Expand);
791 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
792 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
793 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
794 }
795 }
796 setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
797 if (!Subtarget.hasP8Vector()) {
798 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
799 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
800 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
801 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
802 }
803
804 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
805 setOperationAction(ISD::ABS, VT, Custom);
806
807 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
808 // with merges, splats, etc.
809 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
810
811 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
812 // are cheap, so handle them before they get expanded to scalar.
813 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
814 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
815 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
816 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
817 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
818
819 setOperationAction(ISD::AND , MVT::v4i32, Legal);
820 setOperationAction(ISD::OR , MVT::v4i32, Legal);
821 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
822 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
823 setOperationAction(ISD::SELECT, MVT::v4i32,
824 Subtarget.useCRBits() ? Legal : Expand);
825 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
826 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
827 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
828 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
829 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
830 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
831 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
832 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
833 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
834 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
835 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
836 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
837 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
838
839 // Without hasP8Altivec set, v2i64 SMAX isn't available.
840 // But ABS custom lowering requires SMAX support.
841 if (!Subtarget.hasP8Altivec())
842 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
843
844 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
845 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
846 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
847 if (Subtarget.hasAltivec())
848 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
849 setOperationAction(ISD::ROTL, VT, Legal);
850 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
851 if (Subtarget.hasP8Altivec())
852 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
853
854 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
855 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
856 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
857 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
858
859 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
860 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
861
862 if (Subtarget.hasVSX()) {
863 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
864 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
865 }
866
867 if (Subtarget.hasP8Altivec())
868 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
869 else
870 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
871
872 if (Subtarget.isISA3_1()) {
873 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
874 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
875 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
876 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
877 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
878 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
879 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
880 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
881 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
882 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
883 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
884 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
885 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
886 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
887 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
888 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
889 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
890 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
891 }
892
893 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
894 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
895
896 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
897 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
898
899 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
900 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
901 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
902 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
903
904 // Altivec does not contain unordered floating-point compare instructions
905 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
906 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
907 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
908 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
909
910 if (Subtarget.hasVSX()) {
911 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
912 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
913 if (Subtarget.hasP8Vector()) {
914 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
915 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
916 }
917 if (Subtarget.hasDirectMove() && isPPC64) {
918 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
919 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
920 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
921 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
922 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
923 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
924 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
925 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
926 }
927 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
928
929 // The nearbyint variants are not allowed to raise the inexact exception
930 // so we can only code-gen them with unsafe math.
931 if (TM.Options.UnsafeFPMath) {
932 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
933 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
934 }
935
936 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
937 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
938 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
939 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
940 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
941 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
942 setOperationAction(ISD::FROUND, MVT::f64, Legal);
943 setOperationAction(ISD::FRINT, MVT::f64, Legal);
944
945 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
946 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
947 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
948 setOperationAction(ISD::FROUND, MVT::f32, Legal);
949 setOperationAction(ISD::FRINT, MVT::f32, Legal);
950
951 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
952 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
953
954 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
955 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
956
957 // Share the Altivec comparison restrictions.
958 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
959 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
960 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
961 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
962
963 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
964 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
965
966 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
967
968 if (Subtarget.hasP8Vector())
969 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
970
971 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
972
973 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
974 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
975 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
976
977 if (Subtarget.hasP8Altivec()) {
978 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
979 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
980 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
981
982 // 128 bit shifts can be accomplished via 3 instructions for SHL and
983 // SRL, but not for SRA because of the instructions available:
984 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
985 // doing
986 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
987 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
988 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
989
990 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
991 }
992 else {
993 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
994 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
995 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
996
997 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
998
999 // VSX v2i64 only supports non-arithmetic operations.
1000 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1001 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1002 }
1003
1004 if (Subtarget.isISA3_1())
1005 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1006 else
1007 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1008
1009 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1010 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1011 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1012 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1013
1014 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1015
1016 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1017 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1018 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1019 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1020 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1021 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1022 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1023 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1024
1025 // Custom handling for partial vectors of integers converted to
1026 // floating point. We already have optimal handling for v2i32 through
1027 // the DAG combine, so those aren't necessary.
1028 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1029 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1030 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1031 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1032 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1033 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1034 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1035 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1036 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1037 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1038 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1039 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1040 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1041 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1042 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1043 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1044
1045 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1046 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1047 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1048 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1049 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1051
1052 if (Subtarget.hasDirectMove())
1053 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1054 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1055
1056 // Handle constrained floating-point operations of vector.
1057 // The predictor is `hasVSX` because altivec instruction has
1058 // no exception but VSX vector instruction has.
1059 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1060 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1061 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1062 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1063 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1064 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1065 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1066 setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1067 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1068 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1069 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1070 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1071 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1072
1073 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1074 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1075 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1076 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1077 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1078 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1079 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1080 setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1081 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1082 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1083 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1084 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1086
1087 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1088 }
1089
1090 if (Subtarget.hasP8Altivec()) {
1091 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1092 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1093 }
1094
1095 if (Subtarget.hasP9Vector()) {
1096 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1097 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1098
1099 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1100 // SRL, but not for SRA because of the instructions available:
1101 // VS{RL} and VS{RL}O.
1102 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1103 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1104 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1105
1106 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1107 setOperationAction(ISD::FADD, MVT::f128, Legal);
1108 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1109 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1110 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1111 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1112 // No extending loads to f128 on PPC.
1113 for (MVT FPT : MVT::fp_valuetypes())
1114 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1115 setOperationAction(ISD::FMA, MVT::f128, Legal);
1116 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1117 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1118 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1119 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1120 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1121 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1122
1123 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1124 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1125 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1126 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1127 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1128 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1129
1130 setOperationAction(ISD::SELECT, MVT::f128, Expand);
1131 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1132 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1133 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1134 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1135 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1136 // No implementation for these ops for PowerPC.
1137 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1138 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1139 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1140 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1141 setOperationAction(ISD::FREM, MVT::f128, Expand);
1142
1143 // Handle constrained floating-point operations of fp128
1144 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1145 setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1146 setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1147 setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1148 setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1149 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1150 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1151 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1152 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1153 setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1154 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1155 setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1156 setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1157 setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1158 setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1159 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1160 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1161 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1162 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1163 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1164 }
1165
1166 if (Subtarget.hasP9Altivec()) {
1167 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1168 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1169
1170 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1171 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1172 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1173 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1174 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1175 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1177 }
1178 }
1179
1180 if (Subtarget.pairedVectorMemops()) {
1181 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1182 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1183 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1184 }
1185 if (Subtarget.hasMMA()) {
1186 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1187 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1188 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1189 setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1190 }
1191
1192 if (Subtarget.has64BitSupport())
1193 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1194
1195 if (Subtarget.isISA3_1())
1196 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1197
1198 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1199
1200 if (!isPPC64) {
1201 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1202 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1203 }
1204
1205 setBooleanContents(ZeroOrOneBooleanContent);
1206
1207 if (Subtarget.hasAltivec()) {
1208 // Altivec instructions set fields to all zeros or all ones.
1209 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1210 }
1211
1212 if (!isPPC64) {
1213 // These libcalls are not available in 32-bit.
1214 setLibcallName(RTLIB::SHL_I128, nullptr);
1215 setLibcallName(RTLIB::SRL_I128, nullptr);
1216 setLibcallName(RTLIB::SRA_I128, nullptr);
1217 }
1218
1219 if (!isPPC64)
1220 setMaxAtomicSizeInBitsSupported(32);
1221
1222 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1223
1224 // We have target-specific dag combine patterns for the following nodes:
1225 setTargetDAGCombine(ISD::ADD);
1226 setTargetDAGCombine(ISD::SHL);
1227 setTargetDAGCombine(ISD::SRA);
1228 setTargetDAGCombine(ISD::SRL);
1229 setTargetDAGCombine(ISD::MUL);
1230 setTargetDAGCombine(ISD::FMA);
1231 setTargetDAGCombine(ISD::SINT_TO_FP);
1232 setTargetDAGCombine(ISD::BUILD_VECTOR);
1233 if (Subtarget.hasFPCVT())
1234 setTargetDAGCombine(ISD::UINT_TO_FP);
1235 setTargetDAGCombine(ISD::LOAD);
1236 setTargetDAGCombine(ISD::STORE);
1237 setTargetDAGCombine(ISD::BR_CC);
1238 if (Subtarget.useCRBits())
1239 setTargetDAGCombine(ISD::BRCOND);
1240 setTargetDAGCombine(ISD::BSWAP);
1241 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1242 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1243 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1244
1245 setTargetDAGCombine(ISD::SIGN_EXTEND);
1246 setTargetDAGCombine(ISD::ZERO_EXTEND);
1247 setTargetDAGCombine(ISD::ANY_EXTEND);
1248
1249 setTargetDAGCombine(ISD::TRUNCATE);
1250 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1251
1252
1253 if (Subtarget.useCRBits()) {
1254 setTargetDAGCombine(ISD::TRUNCATE);
1255 setTargetDAGCombine(ISD::SETCC);
1256 setTargetDAGCombine(ISD::SELECT_CC);
1257 }
1258
1259 if (Subtarget.hasP9Altivec()) {
1260 setTargetDAGCombine(ISD::ABS);
1261 setTargetDAGCombine(ISD::VSELECT);
1262 }
1263
1264 setLibcallName(RTLIB::LOG_F128, "logf128");
1265 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1266 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1267 setLibcallName(RTLIB::EXP_F128, "expf128");
1268 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1269 setLibcallName(RTLIB::SIN_F128, "sinf128");
1270 setLibcallName(RTLIB::COS_F128, "cosf128");
1271 setLibcallName(RTLIB::POW_F128, "powf128");
1272 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1273 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1274 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1275 setLibcallName(RTLIB::REM_F128, "fmodf128");
1276
1277 // With 32 condition bits, we don't need to sink (and duplicate) compares
1278 // aggressively in CodeGenPrep.
1279 if (Subtarget.useCRBits()) {
1280 setHasMultipleConditionRegisters();
1281 setJumpIsExpensive();
1282 }
1283
1284 setMinFunctionAlignment(Align(4));
1285
1286 switch (Subtarget.getCPUDirective()) {
1287 default: break;
1288 case PPC::DIR_970:
1289 case PPC::DIR_A2:
1290 case PPC::DIR_E500:
1291 case PPC::DIR_E500mc:
1292 case PPC::DIR_E5500:
1293 case PPC::DIR_PWR4:
1294 case PPC::DIR_PWR5:
1295 case PPC::DIR_PWR5X:
1296 case PPC::DIR_PWR6:
1297 case PPC::DIR_PWR6X:
1298 case PPC::DIR_PWR7:
1299 case PPC::DIR_PWR8:
1300 case PPC::DIR_PWR9:
1301 case PPC::DIR_PWR10:
1302 case PPC::DIR_PWR_FUTURE:
1303 setPrefLoopAlignment(Align(16));
1304 setPrefFunctionAlignment(Align(16));
1305 break;
1306 }
1307
1308 if (Subtarget.enableMachineScheduler())
1309 setSchedulingPreference(Sched::Source);
1310 else
1311 setSchedulingPreference(Sched::Hybrid);
1312
1313 computeRegisterProperties(STI.getRegisterInfo());
1314
1315 // The Freescale cores do better with aggressive inlining of memcpy and
1316 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1317 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1318 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1319 MaxStoresPerMemset = 32;
1320 MaxStoresPerMemsetOptSize = 16;
1321 MaxStoresPerMemcpy = 32;
1322 MaxStoresPerMemcpyOptSize = 8;
1323 MaxStoresPerMemmove = 32;
1324 MaxStoresPerMemmoveOptSize = 8;
1325 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1326 // The A2 also benefits from (very) aggressive inlining of memcpy and
1327 // friends. The overhead of a the function call, even when warm, can be
1328 // over one hundred cycles.
1329 MaxStoresPerMemset = 128;
1330 MaxStoresPerMemcpy = 128;
1331 MaxStoresPerMemmove = 128;
1332 MaxLoadsPerMemcmp = 128;
1333 } else {
1334 MaxLoadsPerMemcmp = 8;
1335 MaxLoadsPerMemcmpOptSize = 4;
1336 }
1337
1338 IsStrictFPEnabled = true;
1339
1340 // Let the subtarget (CPU) decide if a predictable select is more expensive
1341 // than the corresponding branch. This information is used in CGP to decide
1342 // when to convert selects into branches.
1343 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1344}
1345
1346/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1347/// the desired ByVal argument alignment.
1348static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1349 if (MaxAlign == MaxMaxAlign)
1350 return;
1351 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1352 if (MaxMaxAlign >= 32 &&
1353 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1354 MaxAlign = Align(32);
1355 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1356 MaxAlign < 16)
1357 MaxAlign = Align(16);
1358 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1359 Align EltAlign;
1360 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1361 if (EltAlign > MaxAlign)
1362 MaxAlign = EltAlign;
1363 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1364 for (auto *EltTy : STy->elements()) {
1365 Align EltAlign;
1366 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1367 if (EltAlign > MaxAlign)
1368 MaxAlign = EltAlign;
1369 if (MaxAlign == MaxMaxAlign)
1370 break;
1371 }
1372 }
1373}
1374
1375/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1376/// function arguments in the caller parameter area.
1377unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1378 const DataLayout &DL) const {
1379 // 16byte and wider vectors are passed on 16byte boundary.
1380 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1381 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1382 if (Subtarget.hasAltivec())
1383 getMaxByValAlign(Ty, Alignment, Align(16));
1384 return Alignment.value();
1385}
1386
1387bool PPCTargetLowering::useSoftFloat() const {
1388 return Subtarget.useSoftFloat();
1389}
1390
1391bool PPCTargetLowering::hasSPE() const {
1392 return Subtarget.hasSPE();
1393}
1394
1395bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1396 return VT.isScalarInteger();
1397}
1398
1399const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1400 switch ((PPCISD::NodeType)Opcode) {
1401 case PPCISD::FIRST_NUMBER: break;
1402 case PPCISD::FSEL: return "PPCISD::FSEL";
1403 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1404 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1405 case PPCISD::FCFID: return "PPCISD::FCFID";
1406 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1407 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1408 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1409 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1410 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1411 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1412 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1413 case PPCISD::FP_TO_UINT_IN_VSR:
1414 return "PPCISD::FP_TO_UINT_IN_VSR,";
1415 case PPCISD::FP_TO_SINT_IN_VSR:
1416 return "PPCISD::FP_TO_SINT_IN_VSR";
1417 case PPCISD::FRE: return "PPCISD::FRE";
1418 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1419 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1420 case PPCISD::VPERM: return "PPCISD::VPERM";
1421 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1422 case PPCISD::XXSPLTI_SP_TO_DP:
1423 return "PPCISD::XXSPLTI_SP_TO_DP";
1424 case PPCISD::XXSPLTI32DX:
1425 return "PPCISD::XXSPLTI32DX";
1426 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1427 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1428 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1429 case PPCISD::CMPB: return "PPCISD::CMPB";
1430 case PPCISD::Hi: return "PPCISD::Hi";
1431 case PPCISD::Lo: return "PPCISD::Lo";
1432 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1433 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1434 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1435 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1436 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1437 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1438 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1439 case PPCISD::SRL: return "PPCISD::SRL";
1440 case PPCISD::SRA: return "PPCISD::SRA";
1441 case PPCISD::SHL: return "PPCISD::SHL";
1442 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1443 case PPCISD::CALL: return "PPCISD::CALL";
1444 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1445 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1446 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1447 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1448 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1449 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1450 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1451 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1452 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1453 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1454 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1455 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1456 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1457 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1458 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1459 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1460 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1461 case PPCISD::ANDI_rec_1_EQ_BIT:
1462 return "PPCISD::ANDI_rec_1_EQ_BIT";
1463 case PPCISD::ANDI_rec_1_GT_BIT:
1464 return "PPCISD::ANDI_rec_1_GT_BIT";
1465 case PPCISD::VCMP: return "PPCISD::VCMP";
1466 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1467 case PPCISD::LBRX: return "PPCISD::LBRX";
1468 case PPCISD::STBRX: return "PPCISD::STBRX";
1469 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1470 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1471 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1472 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1473 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1474 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1475 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1476 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1477 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1478 case PPCISD::ST_VSR_SCAL_INT:
1479 return "PPCISD::ST_VSR_SCAL_INT";
1480 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1481 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1482 case PPCISD::BDZ: return "PPCISD::BDZ";
1483 case PPCISD::MFFS: return "PPCISD::MFFS";
1484 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1485 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1486 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1487 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1488 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1489 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1490 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1491 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1492 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1493 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1494 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1495 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1496 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1497 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1498 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1499 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1500 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1501 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1502 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1503 case PPCISD::PADDI_DTPREL:
1504 return "PPCISD::PADDI_DTPREL";
1505 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1506 case PPCISD::SC: return "PPCISD::SC";
1507 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1508 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1509 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1510 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1511 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1512 case PPCISD::VABSD: return "PPCISD::VABSD";
1513 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1514 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1515 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1516 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1517 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1518 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1519 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1520 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1521 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1522 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1523 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1524 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1525 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1526 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1527 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1528 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1529 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1530 case PPCISD::STRICT_FADDRTZ:
1531 return "PPCISD::STRICT_FADDRTZ";
1532 case PPCISD::STRICT_FCTIDZ:
1533 return "PPCISD::STRICT_FCTIDZ";
1534 case PPCISD::STRICT_FCTIWZ:
1535 return "PPCISD::STRICT_FCTIWZ";
1536 case PPCISD::STRICT_FCTIDUZ:
1537 return "PPCISD::STRICT_FCTIDUZ";
1538 case PPCISD::STRICT_FCTIWUZ:
1539 return "PPCISD::STRICT_FCTIWUZ";
1540 case PPCISD::STRICT_FCFID:
1541 return "PPCISD::STRICT_FCFID";
1542 case PPCISD::STRICT_FCFIDU:
1543 return "PPCISD::STRICT_FCFIDU";
1544 case PPCISD::STRICT_FCFIDS:
1545 return "PPCISD::STRICT_FCFIDS";
1546 case PPCISD::STRICT_FCFIDUS:
1547 return "PPCISD::STRICT_FCFIDUS";
1548 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1549 }
1550 return nullptr;
1551}
1552
1553EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1554 EVT VT) const {
1555 if (!VT.isVector())
1556 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1557
1558 return VT.changeVectorElementTypeToInteger();
1559}
1560
1561bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1562 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1562, __PRETTY_FUNCTION__))
;
1563 return true;
1564}
1565
1566//===----------------------------------------------------------------------===//
1567// Node matching predicates, for use by the tblgen matching code.
1568//===----------------------------------------------------------------------===//
1569
1570/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1571static bool isFloatingPointZero(SDValue Op) {
1572 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1573 return CFP->getValueAPF().isZero();
1574 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1575 // Maybe this has already been legalized into the constant pool?
1576 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1577 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1578 return CFP->getValueAPF().isZero();
1579 }
1580 return false;
1581}
1582
1583/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1584/// true if Op is undef or if it matches the specified value.
1585static bool isConstantOrUndef(int Op, int Val) {
1586 return Op < 0 || Op == Val;
1587}
1588
1589/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1590/// VPKUHUM instruction.
1591/// The ShuffleKind distinguishes between big-endian operations with
1592/// two different inputs (0), either-endian operations with two identical
1593/// inputs (1), and little-endian operations with two different inputs (2).
1594/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1595bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1596 SelectionDAG &DAG) {
1597 bool IsLE = DAG.getDataLayout().isLittleEndian();
1598 if (ShuffleKind == 0) {
1599 if (IsLE)
1600 return false;
1601 for (unsigned i = 0; i != 16; ++i)
1602 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1603 return false;
1604 } else if (ShuffleKind == 2) {
1605 if (!IsLE)
1606 return false;
1607 for (unsigned i = 0; i != 16; ++i)
1608 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1609 return false;
1610 } else if (ShuffleKind == 1) {
1611 unsigned j = IsLE ? 0 : 1;
1612 for (unsigned i = 0; i != 8; ++i)
1613 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1614 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1615 return false;
1616 }
1617 return true;
1618}
1619
1620/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1621/// VPKUWUM instruction.
1622/// The ShuffleKind distinguishes between big-endian operations with
1623/// two different inputs (0), either-endian operations with two identical
1624/// inputs (1), and little-endian operations with two different inputs (2).
1625/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1626bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1627 SelectionDAG &DAG) {
1628 bool IsLE = DAG.getDataLayout().isLittleEndian();
1629 if (ShuffleKind == 0) {
1630 if (IsLE)
1631 return false;
1632 for (unsigned i = 0; i != 16; i += 2)
1633 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1634 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1635 return false;
1636 } else if (ShuffleKind == 2) {
1637 if (!IsLE)
1638 return false;
1639 for (unsigned i = 0; i != 16; i += 2)
1640 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1641 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1642 return false;
1643 } else if (ShuffleKind == 1) {
1644 unsigned j = IsLE ? 0 : 2;
1645 for (unsigned i = 0; i != 8; i += 2)
1646 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1647 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1648 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1649 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1650 return false;
1651 }
1652 return true;
1653}
1654
1655/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1656/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1657/// current subtarget.
1658///
1659/// The ShuffleKind distinguishes between big-endian operations with
1660/// two different inputs (0), either-endian operations with two identical
1661/// inputs (1), and little-endian operations with two different inputs (2).
1662/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1663bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1664 SelectionDAG &DAG) {
1665 const PPCSubtarget& Subtarget =
1666 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1667 if (!Subtarget.hasP8Vector())
1668 return false;
1669
1670 bool IsLE = DAG.getDataLayout().isLittleEndian();
1671 if (ShuffleKind == 0) {
1672 if (IsLE)
1673 return false;
1674 for (unsigned i = 0; i != 16; i += 4)
1675 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1676 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1677 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1678 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1679 return false;
1680 } else if (ShuffleKind == 2) {
1681 if (!IsLE)
1682 return false;
1683 for (unsigned i = 0; i != 16; i += 4)
1684 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1685 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1686 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1687 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1688 return false;
1689 } else if (ShuffleKind == 1) {
1690 unsigned j = IsLE ? 0 : 4;
1691 for (unsigned i = 0; i != 8; i += 4)
1692 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1693 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1694 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1695 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1696 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1697 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1698 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1699 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1700 return false;
1701 }
1702 return true;
1703}
1704
1705/// isVMerge - Common function, used to match vmrg* shuffles.
1706///
1707static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1708 unsigned LHSStart, unsigned RHSStart) {
1709 if (N->getValueType(0) != MVT::v16i8)
1710 return false;
1711 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1712, __PRETTY_FUNCTION__))
1712 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1712, __PRETTY_FUNCTION__))
;
1713
1714 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1715 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1716 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1717 LHSStart+j+i*UnitSize) ||
1718 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1719 RHSStart+j+i*UnitSize))
1720 return false;
1721 }
1722 return true;
1723}
1724
1725/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1726/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1727/// The ShuffleKind distinguishes between big-endian merges with two
1728/// different inputs (0), either-endian merges with two identical inputs (1),
1729/// and little-endian merges with two different inputs (2). For the latter,
1730/// the input operands are swapped (see PPCInstrAltivec.td).
1731bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1732 unsigned ShuffleKind, SelectionDAG &DAG) {
1733 if (DAG.getDataLayout().isLittleEndian()) {
1734 if (ShuffleKind == 1) // unary
1735 return isVMerge(N, UnitSize, 0, 0);
1736 else if (ShuffleKind == 2) // swapped
1737 return isVMerge(N, UnitSize, 0, 16);
1738 else
1739 return false;
1740 } else {
1741 if (ShuffleKind == 1) // unary
1742 return isVMerge(N, UnitSize, 8, 8);
1743 else if (ShuffleKind == 0) // normal
1744 return isVMerge(N, UnitSize, 8, 24);
1745 else
1746 return false;
1747 }
1748}
1749
1750/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1751/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1752/// The ShuffleKind distinguishes between big-endian merges with two
1753/// different inputs (0), either-endian merges with two identical inputs (1),
1754/// and little-endian merges with two different inputs (2). For the latter,
1755/// the input operands are swapped (see PPCInstrAltivec.td).
1756bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1757 unsigned ShuffleKind, SelectionDAG &DAG) {
1758 if (DAG.getDataLayout().isLittleEndian()) {
1759 if (ShuffleKind == 1) // unary
1760 return isVMerge(N, UnitSize, 8, 8);
1761 else if (ShuffleKind == 2) // swapped
1762 return isVMerge(N, UnitSize, 8, 24);
1763 else
1764 return false;
1765 } else {
1766 if (ShuffleKind == 1) // unary
1767 return isVMerge(N, UnitSize, 0, 0);
1768 else if (ShuffleKind == 0) // normal
1769 return isVMerge(N, UnitSize, 0, 16);
1770 else
1771 return false;
1772 }
1773}
1774
1775/**
1776 * Common function used to match vmrgew and vmrgow shuffles
1777 *
1778 * The indexOffset determines whether to look for even or odd words in
1779 * the shuffle mask. This is based on the of the endianness of the target
1780 * machine.
1781 * - Little Endian:
1782 * - Use offset of 0 to check for odd elements
1783 * - Use offset of 4 to check for even elements
1784 * - Big Endian:
1785 * - Use offset of 0 to check for even elements
1786 * - Use offset of 4 to check for odd elements
1787 * A detailed description of the vector element ordering for little endian and
1788 * big endian can be found at
1789 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1790 * Targeting your applications - what little endian and big endian IBM XL C/C++
1791 * compiler differences mean to you
1792 *
1793 * The mask to the shuffle vector instruction specifies the indices of the
1794 * elements from the two input vectors to place in the result. The elements are
1795 * numbered in array-access order, starting with the first vector. These vectors
1796 * are always of type v16i8, thus each vector will contain 16 elements of size
1797 * 8. More info on the shuffle vector can be found in the
1798 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1799 * Language Reference.
1800 *
1801 * The RHSStartValue indicates whether the same input vectors are used (unary)
1802 * or two different input vectors are used, based on the following:
1803 * - If the instruction uses the same vector for both inputs, the range of the
1804 * indices will be 0 to 15. In this case, the RHSStart value passed should
1805 * be 0.
1806 * - If the instruction has two different vectors then the range of the
1807 * indices will be 0 to 31. In this case, the RHSStart value passed should
1808 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1809 * to 31 specify elements in the second vector).
1810 *
1811 * \param[in] N The shuffle vector SD Node to analyze
1812 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1813 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1814 * vector to the shuffle_vector instruction
1815 * \return true iff this shuffle vector represents an even or odd word merge
1816 */
1817static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1818 unsigned RHSStartValue) {
1819 if (N->getValueType(0) != MVT::v16i8)
1820 return false;
1821
1822 for (unsigned i = 0; i < 2; ++i)
1823 for (unsigned j = 0; j < 4; ++j)
1824 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1825 i*RHSStartValue+j+IndexOffset) ||
1826 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1827 i*RHSStartValue+j+IndexOffset+8))
1828 return false;
1829 return true;
1830}
1831
1832/**
1833 * Determine if the specified shuffle mask is suitable for the vmrgew or
1834 * vmrgow instructions.
1835 *
1836 * \param[in] N The shuffle vector SD Node to analyze
1837 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1838 * \param[in] ShuffleKind Identify the type of merge:
1839 * - 0 = big-endian merge with two different inputs;
1840 * - 1 = either-endian merge with two identical inputs;
1841 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1842 * little-endian merges).
1843 * \param[in] DAG The current SelectionDAG
1844 * \return true iff this shuffle mask
1845 */
1846bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1847 unsigned ShuffleKind, SelectionDAG &DAG) {
1848 if (DAG.getDataLayout().isLittleEndian()) {
1849 unsigned indexOffset = CheckEven ? 4 : 0;
1850 if (ShuffleKind == 1) // Unary
1851 return isVMerge(N, indexOffset, 0);
1852 else if (ShuffleKind == 2) // swapped
1853 return isVMerge(N, indexOffset, 16);
1854 else
1855 return false;
1856 }
1857 else {
1858 unsigned indexOffset = CheckEven ? 0 : 4;
1859 if (ShuffleKind == 1) // Unary
1860 return isVMerge(N, indexOffset, 0);
1861 else if (ShuffleKind == 0) // Normal
1862 return isVMerge(N, indexOffset, 16);
1863 else
1864 return false;
1865 }
1866 return false;
1867}
1868
1869/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1870/// amount, otherwise return -1.
1871/// The ShuffleKind distinguishes between big-endian operations with two
1872/// different inputs (0), either-endian operations with two identical inputs
1873/// (1), and little-endian operations with two different inputs (2). For the
1874/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1875int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1876 SelectionDAG &DAG) {
1877 if (N->getValueType(0) != MVT::v16i8)
1878 return -1;
1879
1880 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1881
1882 // Find the first non-undef value in the shuffle mask.
1883 unsigned i;
1884 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1885 /*search*/;
1886
1887 if (i == 16) return -1; // all undef.
1888
1889 // Otherwise, check to see if the rest of the elements are consecutively
1890 // numbered from this value.
1891 unsigned ShiftAmt = SVOp->getMaskElt(i);
1892 if (ShiftAmt < i) return -1;
1893
1894 ShiftAmt -= i;
1895 bool isLE = DAG.getDataLayout().isLittleEndian();
1896
1897 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1898 // Check the rest of the elements to see if they are consecutive.
1899 for (++i; i != 16; ++i)
1900 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1901 return -1;
1902 } else if (ShuffleKind == 1) {
1903 // Check the rest of the elements to see if they are consecutive.
1904 for (++i; i != 16; ++i)
1905 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1906 return -1;
1907 } else
1908 return -1;
1909
1910 if (isLE)
1911 ShiftAmt = 16 - ShiftAmt;
1912
1913 return ShiftAmt;
1914}
1915
1916/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1917/// specifies a splat of a single element that is suitable for input to
1918/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1919bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1920 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1921, __PRETTY_FUNCTION__))
1921 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1921, __PRETTY_FUNCTION__))
;
1922
1923 // The consecutive indices need to specify an element, not part of two
1924 // different elements. So abandon ship early if this isn't the case.
1925 if (N->getMaskElt(0) % EltSize != 0)
1926 return false;
1927
1928 // This is a splat operation if each element of the permute is the same, and
1929 // if the value doesn't reference the second vector.
1930 unsigned ElementBase = N->getMaskElt(0);
1931
1932 // FIXME: Handle UNDEF elements too!
1933 if (ElementBase >= 16)
1934 return false;
1935
1936 // Check that the indices are consecutive, in the case of a multi-byte element
1937 // splatted with a v16i8 mask.
1938 for (unsigned i = 1; i != EltSize; ++i)
1939 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1940 return false;
1941
1942 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1943 if (N->getMaskElt(i) < 0) continue;
1944 for (unsigned j = 0; j != EltSize; ++j)
1945 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1946 return false;
1947 }
1948 return true;
1949}
1950
1951/// Check that the mask is shuffling N byte elements. Within each N byte
1952/// element of the mask, the indices could be either in increasing or
1953/// decreasing order as long as they are consecutive.
1954/// \param[in] N the shuffle vector SD Node to analyze
1955/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1956/// Word/DoubleWord/QuadWord).
1957/// \param[in] StepLen the delta indices number among the N byte element, if
1958/// the mask is in increasing/decreasing order then it is 1/-1.
1959/// \return true iff the mask is shuffling N byte elements.
1960static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1961 int StepLen) {
1962 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))
1963 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))
;
1964 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1964, __PRETTY_FUNCTION__))
;
1965
1966 unsigned NumOfElem = 16 / Width;
1967 unsigned MaskVal[16]; // Width is never greater than 16
1968 for (unsigned i = 0; i < NumOfElem; ++i) {
1969 MaskVal[0] = N->getMaskElt(i * Width);
1970 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1971 return false;
1972 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1973 return false;
1974 }
1975
1976 for (unsigned int j = 1; j < Width; ++j) {
1977 MaskVal[j] = N->getMaskElt(i * Width + j);
1978 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1979 return false;
1980 }
1981 }
1982 }
1983
1984 return true;
1985}
1986
1987bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1988 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1989 if (!isNByteElemShuffleMask(N, 4, 1))
1990 return false;
1991
1992 // Now we look at mask elements 0,4,8,12
1993 unsigned M0 = N->getMaskElt(0) / 4;
1994 unsigned M1 = N->getMaskElt(4) / 4;
1995 unsigned M2 = N->getMaskElt(8) / 4;
1996 unsigned M3 = N->getMaskElt(12) / 4;
1997 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1998 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1999
2000 // Below, let H and L be arbitrary elements of the shuffle mask
2001 // where H is in the range [4,7] and L is in the range [0,3].
2002 // H, 1, 2, 3 or L, 5, 6, 7
2003 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2004 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2005 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2006 InsertAtByte = IsLE ? 12 : 0;
2007 Swap = M0 < 4;
2008 return true;
2009 }
2010 // 0, H, 2, 3 or 4, L, 6, 7
2011 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2012 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2013 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2014 InsertAtByte = IsLE ? 8 : 4;
2015 Swap = M1 < 4;
2016 return true;
2017 }
2018 // 0, 1, H, 3 or 4, 5, L, 7
2019 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2020 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2021 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2022 InsertAtByte = IsLE ? 4 : 8;
2023 Swap = M2 < 4;
2024 return true;
2025 }
2026 // 0, 1, 2, H or 4, 5, 6, L
2027 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2028 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2029 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2030 InsertAtByte = IsLE ? 0 : 12;
2031 Swap = M3 < 4;
2032 return true;
2033 }
2034
2035 // If both vector operands for the shuffle are the same vector, the mask will
2036 // contain only elements from the first one and the second one will be undef.
2037 if (N->getOperand(1).isUndef()) {
2038 ShiftElts = 0;
2039 Swap = true;
2040 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2041 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2042 InsertAtByte = IsLE ? 12 : 0;
2043 return true;
2044 }
2045 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2046 InsertAtByte = IsLE ? 8 : 4;
2047 return true;
2048 }
2049 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2050 InsertAtByte = IsLE ? 4 : 8;
2051 return true;
2052 }
2053 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2054 InsertAtByte = IsLE ? 0 : 12;
2055 return true;
2056 }
2057 }
2058
2059 return false;
2060}
2061
2062bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2063 bool &Swap, bool IsLE) {
2064 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2064, __PRETTY_FUNCTION__))
;
2065 // Ensure each byte index of the word is consecutive.
2066 if (!isNByteElemShuffleMask(N, 4, 1))
2067 return false;
2068
2069 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2070 unsigned M0 = N->getMaskElt(0) / 4;
2071 unsigned M1 = N->getMaskElt(4) / 4;
2072 unsigned M2 = N->getMaskElt(8) / 4;
2073 unsigned M3 = N->getMaskElt(12) / 4;
2074
2075 // If both vector operands for the shuffle are the same vector, the mask will
2076 // contain only elements from the first one and the second one will be undef.
2077 if (N->getOperand(1).isUndef()) {
2078 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2078, __PRETTY_FUNCTION__))
;
2079 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2080 return false;
2081
2082 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2083 Swap = false;
2084 return true;
2085 }
2086
2087 // Ensure each word index of the ShuffleVector Mask is consecutive.
2088 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2089 return false;
2090
2091 if (IsLE) {
2092 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2093 // Input vectors don't need to be swapped if the leading element
2094 // of the result is one of the 3 left elements of the second vector
2095 // (or if there is no shift to be done at all).
2096 Swap = false;
2097 ShiftElts = (8 - M0) % 8;
2098 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2099 // Input vectors need to be swapped if the leading element
2100 // of the result is one of the 3 left elements of the first vector
2101 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2102 Swap = true;
2103 ShiftElts = (4 - M0) % 4;
2104 }
2105
2106 return true;
2107 } else { // BE
2108 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2109 // Input vectors don't need to be swapped if the leading element
2110 // of the result is one of the 4 elements of the first vector.
2111 Swap = false;
2112 ShiftElts = M0;
2113 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2114 // Input vectors need to be swapped if the leading element
2115 // of the result is one of the 4 elements of the right vector.
2116 Swap = true;
2117 ShiftElts = M0 - 4;
2118 }
2119
2120 return true;
2121 }
2122}
2123
2124bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2125 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2125, __PRETTY_FUNCTION__))
;
2126
2127 if (!isNByteElemShuffleMask(N, Width, -1))
2128 return false;
2129
2130 for (int i = 0; i < 16; i += Width)
2131 if (N->getMaskElt(i) != i + Width - 1)
2132 return false;
2133
2134 return true;
2135}
2136
2137bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2138 return isXXBRShuffleMaskHelper(N, 2);
2139}
2140
2141bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2142 return isXXBRShuffleMaskHelper(N, 4);
2143}
2144
2145bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2146 return isXXBRShuffleMaskHelper(N, 8);
2147}
2148
2149bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2150 return isXXBRShuffleMaskHelper(N, 16);
2151}
2152
2153/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2154/// if the inputs to the instruction should be swapped and set \p DM to the
2155/// value for the immediate.
2156/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2157/// AND element 0 of the result comes from the first input (LE) or second input
2158/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2159/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2160/// mask.
2161bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2162 bool &Swap, bool IsLE) {
2163 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2163, __PRETTY_FUNCTION__))
;
2164
2165 // Ensure each byte index of the double word is consecutive.
2166 if (!isNByteElemShuffleMask(N, 8, 1))
2167 return false;
2168
2169 unsigned M0 = N->getMaskElt(0) / 8;
2170 unsigned M1 = N->getMaskElt(8) / 8;
2171 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2171, __PRETTY_FUNCTION__))
;
2172
2173 // If both vector operands for the shuffle are the same vector, the mask will
2174 // contain only elements from the first one and the second one will be undef.
2175 if (N->getOperand(1).isUndef()) {
2176 if ((M0 | M1) < 2) {
2177 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2178 Swap = false;
2179 return true;
2180 } else
2181 return false;
2182 }
2183
2184 if (IsLE) {
2185 if (M0 > 1 && M1 < 2) {
2186 Swap = false;
2187 } else if (M0 < 2 && M1 > 1) {
2188 M0 = (M0 + 2) % 4;
2189 M1 = (M1 + 2) % 4;
2190 Swap = true;
2191 } else
2192 return false;
2193
2194 // Note: if control flow comes here that means Swap is already set above
2195 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2196 return true;
2197 } else { // BE
2198 if (M0 < 2 && M1 > 1) {
2199 Swap = false;
2200 } else if (M0 > 1 && M1 < 2) {
2201 M0 = (M0 + 2) % 4;
2202 M1 = (M1 + 2) % 4;
2203 Swap = true;
2204 } else
2205 return false;
2206
2207 // Note: if control flow comes here that means Swap is already set above
2208 DM = (M0 << 1) + (M1 & 1);
2209 return true;
2210 }
2211}
2212
2213
2214/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2215/// appropriate for PPC mnemonics (which have a big endian bias - namely
2216/// elements are counted from the left of the vector register).
2217unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2218 SelectionDAG &DAG) {
2219 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2220 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2220, __PRETTY_FUNCTION__))
;
2221 if (DAG.getDataLayout().isLittleEndian())
2222 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2223 else
2224 return SVOp->getMaskElt(0) / EltSize;
2225}
2226
2227/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2228/// by using a vspltis[bhw] instruction of the specified element size, return
2229/// the constant being splatted. The ByteSize field indicates the number of
2230/// bytes of each element [124] -> [bhw].
2231SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2232 SDValue OpVal(nullptr, 0);
2233
2234 // If ByteSize of the splat is bigger than the element size of the
2235 // build_vector, then we have a case where we are checking for a splat where
2236 // multiple elements of the buildvector are folded together into a single
2237 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2238 unsigned EltSize = 16/N->getNumOperands();
2239 if (EltSize < ByteSize) {
2240 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2241 SDValue UniquedVals[4];
2242 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2242, __PRETTY_FUNCTION__))
;
2243
2244 // See if all of the elements in the buildvector agree across.
2245 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2246 if (N->getOperand(i).isUndef()) continue;
2247 // If the element isn't a constant, bail fully out.
2248 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2249
2250 if (!UniquedVals[i&(Multiple-1)].getNode())
2251 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2252 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2253 return SDValue(); // no match.
2254 }
2255
2256 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2257 // either constant or undef values that are identical for each chunk. See
2258 // if these chunks can form into a larger vspltis*.
2259
2260 // Check to see if all of the leading entries are either 0 or -1. If
2261 // neither, then this won't fit into the immediate field.
2262 bool LeadingZero = true;
2263 bool LeadingOnes = true;
2264 for (unsigned i = 0; i != Multiple-1; ++i) {
2265 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2266
2267 LeadingZero &= isNullConstant(UniquedVals[i]);
2268 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2269 }
2270 // Finally, check the least significant entry.
2271 if (LeadingZero) {
2272 if (!UniquedVals[Multiple-1].getNode())
2273 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2274 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2275 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2276 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2277 }
2278 if (LeadingOnes) {
2279 if (!UniquedVals[Multiple-1].getNode())
2280 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2281 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2282 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2283 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2284 }
2285
2286 return SDValue();
2287 }
2288
2289 // Check to see if this buildvec has a single non-undef value in its elements.
2290 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2291 if (N->getOperand(i).isUndef()) continue;
2292 if (!OpVal.getNode())
2293 OpVal = N->getOperand(i);
2294 else if (OpVal != N->getOperand(i))
2295 return SDValue();
2296 }
2297
2298 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2299
2300 unsigned ValSizeInBytes = EltSize;
2301 uint64_t Value = 0;
2302 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2303 Value = CN->getZExtValue();
2304 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2305 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2305, __PRETTY_FUNCTION__))
;
2306 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2307 }
2308
2309 // If the splat value is larger than the element value, then we can never do
2310 // this splat. The only case that we could fit the replicated bits into our
2311 // immediate field for would be zero, and we prefer to use vxor for it.
2312 if (ValSizeInBytes < ByteSize) return SDValue();
2313
2314 // If the element value is larger than the splat value, check if it consists
2315 // of a repeated bit pattern of size ByteSize.
2316 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2317 return SDValue();
2318
2319 // Properly sign extend the value.
2320 int MaskVal = SignExtend32(Value, ByteSize * 8);
2321
2322 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2323 if (MaskVal == 0) return SDValue();
2324
2325 // Finally, if this value fits in a 5 bit sext field, return it
2326 if (SignExtend32<5>(MaskVal) == MaskVal)
2327 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2328 return SDValue();
2329}
2330
2331/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2332/// amount, otherwise return -1.
2333int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2334 EVT VT = N->getValueType(0);
2335 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2336 return -1;
2337
2338 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2339
2340 // Find the first non-undef value in the shuffle mask.
2341 unsigned i;
2342 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2343 /*search*/;
2344
2345 if (i == 4) return -1; // all undef.
2346
2347 // Otherwise, check to see if the rest of the elements are consecutively
2348 // numbered from this value.
2349 unsigned ShiftAmt = SVOp->getMaskElt(i);
2350 if (ShiftAmt < i) return -1;
2351 ShiftAmt -= i;
2352
2353 // Check the rest of the elements to see if they are consecutive.
2354 for (++i; i != 4; ++i)
2355 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2356 return -1;
2357
2358 return ShiftAmt;
2359}
2360
2361//===----------------------------------------------------------------------===//
2362// Addressing Mode Selection
2363//===----------------------------------------------------------------------===//
2364
2365/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2366/// or 64-bit immediate, and if the value can be accurately represented as a
2367/// sign extension from a 16-bit value. If so, this returns true and the
2368/// immediate.
2369bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2370 if (!isa<ConstantSDNode>(N))
2371 return false;
2372
2373 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2374 if (N->getValueType(0) == MVT::i32)
2375 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2376 else
2377 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2378}
2379bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2380 return isIntS16Immediate(Op.getNode(), Imm);
2381}
2382
2383
2384/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2385/// be represented as an indexed [r+r] operation.
2386bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2387 SDValue &Index,
2388 SelectionDAG &DAG) const {
2389 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2390 UI != E; ++UI) {
2391 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2392 if (Memop->getMemoryVT() == MVT::f64) {
2393 Base = N.getOperand(0);
2394 Index = N.getOperand(1);
2395 return true;
2396 }
2397 }
2398 }
2399 return false;
2400}
2401
2402/// SelectAddressRegReg - Given the specified addressed, check to see if it
2403/// can be represented as an indexed [r+r] operation. Returns false if it
2404/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2405/// non-zero and N can be represented by a base register plus a signed 16-bit
2406/// displacement, make a more precise judgement by checking (displacement % \p
2407/// EncodingAlignment).
2408bool PPCTargetLowering::SelectAddressRegReg(
2409 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2410 MaybeAlign EncodingAlignment) const {
2411 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2412 // a [pc+imm].
2413 if (SelectAddressPCRel(N, Base))
2414 return false;
2415
2416 int16_t Imm = 0;
2417 if (N.getOpcode() == ISD::ADD) {
2418 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2419 // SPE load/store can only handle 8-bit offsets.
2420 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2421 return true;
2422 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2423 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2424 return false; // r+i
2425 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2426 return false; // r+i
2427
2428 Base = N.getOperand(0);
2429 Index = N.getOperand(1);
2430 return true;
2431 } else if (N.getOpcode() == ISD::OR) {
2432 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2433 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2434 return false; // r+i can fold it if we can.
2435
2436 // If this is an or of disjoint bitfields, we can codegen this as an add
2437 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2438 // disjoint.
2439 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2440
2441 if (LHSKnown.Zero.getBoolValue()) {
2442 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2443 // If all of the bits are known zero on the LHS or RHS, the add won't
2444 // carry.
2445 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2446 Base = N.getOperand(0);
2447 Index = N.getOperand(1);
2448 return true;
2449 }
2450 }
2451 }
2452
2453 return false;
2454}
2455
2456// If we happen to be doing an i64 load or store into a stack slot that has
2457// less than a 4-byte alignment, then the frame-index elimination may need to
2458// use an indexed load or store instruction (because the offset may not be a
2459// multiple of 4). The extra register needed to hold the offset comes from the
2460// register scavenger, and it is possible that the scavenger will need to use
2461// an emergency spill slot. As a result, we need to make sure that a spill slot
2462// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2463// stack slot.
2464static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2465 // FIXME: This does not handle the LWA case.
2466 if (VT != MVT::i64)
2467 return;
2468
2469 // NOTE: We'll exclude negative FIs here, which come from argument
2470 // lowering, because there are no known test cases triggering this problem
2471 // using packed structures (or similar). We can remove this exclusion if
2472 // we find such a test case. The reason why this is so test-case driven is
2473 // because this entire 'fixup' is only to prevent crashes (from the
2474 // register scavenger) on not-really-valid inputs. For example, if we have:
2475 // %a = alloca i1
2476 // %b = bitcast i1* %a to i64*
2477 // store i64* a, i64 b
2478 // then the store should really be marked as 'align 1', but is not. If it
2479 // were marked as 'align 1' then the indexed form would have been
2480 // instruction-selected initially, and the problem this 'fixup' is preventing
2481 // won't happen regardless.
2482 if (FrameIdx < 0)
2483 return;
2484
2485 MachineFunction &MF = DAG.getMachineFunction();
2486 MachineFrameInfo &MFI = MF.getFrameInfo();
2487
2488 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2489 return;
2490
2491 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2492 FuncInfo->setHasNonRISpills();
2493}
2494
2495/// Returns true if the address N can be represented by a base register plus
2496/// a signed 16-bit displacement [r+imm], and if it is not better
2497/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2498/// displacements that are multiples of that value.
2499bool PPCTargetLowering::SelectAddressRegImm(
2500 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2501 MaybeAlign EncodingAlignment) const {
2502 // FIXME dl should come from parent load or store, not from address
2503 SDLoc dl(N);
2504
2505 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2506 // a [pc+imm].
2507 if (SelectAddressPCRel(N, Base))
2508 return false;
2509
2510 // If this can be more profitably realized as r+r, fail.
2511 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2512 return false;
2513
2514 if (N.getOpcode() == ISD::ADD) {
2515 int16_t imm = 0;
2516 if (isIntS16Immediate(N.getOperand(1), imm) &&
2517 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2518 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2519 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2520 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2521 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2522 } else {
2523 Base = N.getOperand(0);
2524 }
2525 return true; // [r+i]
2526 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2527 // Match LOAD (ADD (X, Lo(G))).
2528 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2529, __PRETTY_FUNCTION__))
2529 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2529, __PRETTY_FUNCTION__))
;
2530 Disp = N.getOperand(1).getOperand(0); // The global address.
2531 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2532 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2533 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2534 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
;
2535 Base = N.getOperand(0);
2536 return true; // [&g+r]
2537 }
2538 } else if (N.getOpcode() == ISD::OR) {
2539 int16_t imm = 0;
2540 if (isIntS16Immediate(N.getOperand(1), imm) &&
2541 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2542 // If this is an or of disjoint bitfields, we can codegen this as an add
2543 // (for better address arithmetic) if the LHS and RHS of the OR are
2544 // provably disjoint.
2545 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2546
2547 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2548 // If all of the bits are known zero on the LHS or RHS, the add won't
2549 // carry.
2550 if (FrameIndexSDNode *FI =
2551 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2552 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2553 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2554 } else {
2555 Base = N.getOperand(0);
2556 }
2557 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2558 return true;
2559 }
2560 }
2561 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2562 // Loading from a constant address.
2563
2564 // If this address fits entirely in a 16-bit sext immediate field, codegen
2565 // this as "d, 0"
2566 int16_t Imm;
2567 if (isIntS16Immediate(CN, Imm) &&
2568 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2569 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2570 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2571 CN->getValueType(0));
2572 return true;
2573 }
2574
2575 // Handle 32-bit sext immediates with LIS + addr mode.
2576 if ((CN->getValueType(0) == MVT::i32 ||
2577 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2578 (!EncodingAlignment ||
2579 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2580 int Addr = (int)CN->getZExtValue();
2581
2582 // Otherwise, break this down into an LIS + disp.
2583 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2584
2585 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2586 MVT::i32);
2587 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2588 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2589 return true;
2590 }
2591 }
2592
2593 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2594 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2595 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2596 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2597 } else
2598 Base = N;
2599 return true; // [r+0]
2600}
2601
2602/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2603/// represented as an indexed [r+r] operation.
2604bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2605 SDValue &Index,
2606 SelectionDAG &DAG) const {
2607 // Check to see if we can easily represent this as an [r+r] address. This
2608 // will fail if it thinks that the address is more profitably represented as
2609 // reg+imm, e.g. where imm = 0.
2610 if (SelectAddressRegReg(N, Base, Index, DAG))
2611 return true;
2612
2613 // If the address is the result of an add, we will utilize the fact that the
2614 // address calculation includes an implicit add. However, we can reduce
2615 // register pressure if we do not materialize a constant just for use as the
2616 // index register. We only get rid of the add if it is not an add of a
2617 // value and a 16-bit signed constant and both have a single use.
2618 int16_t imm = 0;
2619 if (N.getOpcode() == ISD::ADD &&
2620 (!isIntS16Immediate(N.getOperand(1), imm) ||
2621 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2622 Base = N.getOperand(0);
2623 Index = N.getOperand(1);
2624 return true;
2625 }
2626
2627 // Otherwise, do it the hard way, using R0 as the base register.
2628 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2629 N.getValueType());
2630 Index = N;
2631 return true;
2632}
2633
2634template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2635 Ty *PCRelCand = dyn_cast<Ty>(N);
2636 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2637}
2638
2639/// Returns true if this address is a PC Relative address.
2640/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2641/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2642bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2643 // This is a materialize PC Relative node. Always select this as PC Relative.
2644 Base = N;
2645 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2646 return true;
2647 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2648 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2649 isValidPCRelNode<JumpTableSDNode>(N) ||
2650 isValidPCRelNode<BlockAddressSDNode>(N))
2651 return true;
2652 return false;
2653}
2654
2655/// Returns true if we should use a direct load into vector instruction
2656/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2657static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2658
2659 // If there are any other uses other than scalar to vector, then we should
2660 // keep it as a scalar load -> direct move pattern to prevent multiple
2661 // loads.
2662 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2663 if (!LD)
2664 return false;
2665
2666 EVT MemVT = LD->getMemoryVT();
2667 if (!MemVT.isSimple())
2668 return false;
2669 switch(MemVT.getSimpleVT().SimpleTy) {
2670 case MVT::i64:
2671 break;
2672 case MVT::i32:
2673 if (!ST.hasP8Vector())
2674 return false;
2675 break;
2676 case MVT::i16:
2677 case MVT::i8:
2678 if (!ST.hasP9Vector())
2679 return false;
2680 break;
2681 default:
2682 return false;
2683 }
2684
2685 SDValue LoadedVal(N, 0);
2686 if (!LoadedVal.hasOneUse())
2687 return false;
2688
2689 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2690 UI != UE; ++UI)
2691 if (UI.getUse().get().getResNo() == 0 &&
2692 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2693 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2694 return false;
2695
2696 return true;
2697}
2698
2699/// getPreIndexedAddressParts - returns true by value, base pointer and
2700/// offset pointer and addressing mode by reference if the node's address
2701/// can be legally represented as pre-indexed load / store address.
2702bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2703 SDValue &Offset,
2704 ISD::MemIndexedMode &AM,
2705 SelectionDAG &DAG) const {
2706 if (DisablePPCPreinc) return false;
2707
2708 bool isLoad = true;
2709 SDValue Ptr;
2710 EVT VT;
2711 unsigned Alignment;
2712 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2713 Ptr = LD->getBasePtr();
2714 VT = LD->getMemoryVT();
2715 Alignment = LD->getAlignment();
2716 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2717 Ptr = ST->getBasePtr();
2718 VT = ST->getMemoryVT();
2719 Alignment = ST->getAlignment();
2720 isLoad = false;
2721 } else
2722 return false;
2723
2724 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2725 // instructions because we can fold these into a more efficient instruction
2726 // instead, (such as LXSD).
2727 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2728 return false;
2729 }
2730
2731 // PowerPC doesn't have preinc load/store instructions for vectors
2732 if (VT.isVector())
2733 return false;
2734
2735 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2736 // Common code will reject creating a pre-inc form if the base pointer
2737 // is a frame index, or if N is a store and the base pointer is either
2738 // the same as or a predecessor of the value being stored. Check for
2739 // those situations here, and try with swapped Base/Offset instead.
2740 bool Swap = false;
2741
2742 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2743 Swap = true;
2744 else if (!isLoad) {
2745 SDValue Val = cast<StoreSDNode>(N)->getValue();
2746 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2747 Swap = true;
2748 }
2749
2750 if (Swap)
2751 std::swap(Base, Offset);
2752
2753 AM = ISD::PRE_INC;
2754 return true;
2755 }
2756
2757 // LDU/STU can only handle immediates that are a multiple of 4.
2758 if (VT != MVT::i64) {
2759 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2760 return false;
2761 } else {
2762 // LDU/STU need an address with at least 4-byte alignment.
2763 if (Alignment < 4)
2764 return false;
2765
2766 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2767 return false;
2768 }
2769
2770 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2771 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2772 // sext i32 to i64 when addr mode is r+i.
2773 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2774 LD->getExtensionType() == ISD::SEXTLOAD &&
2775 isa<ConstantSDNode>(Offset))
2776 return false;
2777 }
2778
2779 AM = ISD::PRE_INC;
2780 return true;
2781}
2782
2783//===----------------------------------------------------------------------===//
2784// LowerOperation implementation
2785//===----------------------------------------------------------------------===//
2786
2787/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2788/// and LoOpFlags to the target MO flags.
2789static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2790 unsigned &HiOpFlags, unsigned &LoOpFlags,
2791 const GlobalValue *GV = nullptr) {
2792 HiOpFlags = PPCII::MO_HA;
2793 LoOpFlags = PPCII::MO_LO;
2794
2795 // Don't use the pic base if not in PIC relocation model.
2796 if (IsPIC) {
2797 HiOpFlags |= PPCII::MO_PIC_FLAG;
2798 LoOpFlags |= PPCII::MO_PIC_FLAG;
2799 }
2800}
2801
2802static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2803 SelectionDAG &DAG) {
2804 SDLoc DL(HiPart);
2805 EVT PtrVT = HiPart.getValueType();
2806 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2807
2808 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2809 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2810
2811 // With PIC, the first instruction is actually "GR+hi(&G)".
2812 if (isPIC)
2813 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2814 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2815
2816 // Generate non-pic code that has direct accesses to the constant pool.
2817 // The address of the global is just (hi(&g)+lo(&g)).
2818 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2819}
2820
2821static void setUsesTOCBasePtr(MachineFunction &MF) {
2822 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2823 FuncInfo->setUsesTOCBasePtr();
2824}
2825
2826static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2827 setUsesTOCBasePtr(DAG.getMachineFunction());
2828}
2829
2830SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2831 SDValue GA) const {
2832 const bool Is64Bit = Subtarget.isPPC64();
2833 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2834 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2835 : Subtarget.isAIXABI()
2836 ? DAG.getRegister(PPC::R2, VT)
2837 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2838 SDValue Ops[] = { GA, Reg };
2839 return DAG.getMemIntrinsicNode(
2840 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2841 MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2842 MachineMemOperand::MOLoad);
2843}
2844
2845SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2846 SelectionDAG &DAG) const {
2847 EVT PtrVT = Op.getValueType();
2848 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2849 const Constant *C = CP->getConstVal();
2850
2851 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2852 // The actual address of the GlobalValue is stored in the TOC.
2853 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2854 if (Subtarget.isUsingPCRelativeCalls()) {
2855 SDLoc DL(CP);
2856 EVT Ty = getPointerTy(DAG.getDataLayout());
2857 SDValue ConstPool = DAG.getTargetConstantPool(
2858 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2859 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2860 }
2861 setUsesTOCBasePtr(DAG);
2862 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2863 return getTOCEntry(DAG, SDLoc(CP), GA);
2864 }
2865
2866 unsigned MOHiFlag, MOLoFlag;
2867 bool IsPIC = isPositionIndependent();
2868 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2869
2870 if (IsPIC && Subtarget.isSVR4ABI()) {
2871 SDValue GA =
2872 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2873 return getTOCEntry(DAG, SDLoc(CP), GA);
2874 }
2875
2876 SDValue CPIHi =
2877 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2878 SDValue CPILo =
2879 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2880 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2881}
2882
2883// For 64-bit PowerPC, prefer the more compact relative encodings.
2884// This trades 32 bits per jump table entry for one or two instructions
2885// on the jump site.
2886unsigned PPCTargetLowering::getJumpTableEncoding() const {
2887 if (isJumpTableRelative())
2888 return MachineJumpTableInfo::EK_LabelDifference32;
2889
2890 return TargetLowering::getJumpTableEncoding();
2891}
2892
2893bool PPCTargetLowering::isJumpTableRelative() const {
2894 if (UseAbsoluteJumpTables)
2895 return false;
2896 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2897 return true;
2898 return TargetLowering::isJumpTableRelative();
2899}
2900
2901SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2902 SelectionDAG &DAG) const {
2903 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2904 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2905
2906 switch (getTargetMachine().getCodeModel()) {
2907 case CodeModel::Small:
2908 case CodeModel::Medium:
2909 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2910 default:
2911 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2912 getPointerTy(DAG.getDataLayout()));
2913 }
2914}
2915
2916const MCExpr *
2917PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2918 unsigned JTI,
2919 MCContext &Ctx) const {
2920 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2921 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2922
2923 switch (getTargetMachine().getCodeModel()) {
2924 case CodeModel::Small:
2925 case CodeModel::Medium:
2926 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2927 default:
2928 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2929 }
2930}
2931
2932SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2933 EVT PtrVT = Op.getValueType();
2934 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2935
2936 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2937 if (Subtarget.isUsingPCRelativeCalls()) {
2938 SDLoc DL(JT);
2939 EVT Ty = getPointerTy(DAG.getDataLayout());
2940 SDValue GA =
2941 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2942 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2943 return MatAddr;
2944 }
2945
2946 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2947 // The actual address of the GlobalValue is stored in the TOC.
2948 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2949 setUsesTOCBasePtr(DAG);
2950 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2951 return getTOCEntry(DAG, SDLoc(JT), GA);
2952 }
2953
2954 unsigned MOHiFlag, MOLoFlag;
2955 bool IsPIC = isPositionIndependent();
2956 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2957
2958 if (IsPIC && Subtarget.isSVR4ABI()) {
2959 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2960 PPCII::MO_PIC_FLAG);
2961 return getTOCEntry(DAG, SDLoc(GA), GA);
2962 }
2963
2964 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2965 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2966 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2967}
2968
2969SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2970 SelectionDAG &DAG) const {
2971 EVT PtrVT = Op.getValueType();
2972 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2973 const BlockAddress *BA = BASDN->getBlockAddress();
2974
2975 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2976 if (Subtarget.isUsingPCRelativeCalls()) {
2977 SDLoc DL(BASDN);
2978 EVT Ty = getPointerTy(DAG.getDataLayout());
2979 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2980 PPCII::MO_PCREL_FLAG);
2981 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2982 return MatAddr;
2983 }
2984
2985 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2986 // The actual BlockAddress is stored in the TOC.
2987 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2988 setUsesTOCBasePtr(DAG);
2989 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2990 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2991 }
2992
2993 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2994 if (Subtarget.is32BitELFABI() && isPositionIndependent())
2995 return getTOCEntry(
2996 DAG, SDLoc(BASDN),
2997 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2998
2999 unsigned MOHiFlag, MOLoFlag;
3000 bool IsPIC = isPositionIndependent();
3001 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3002 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3003 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3004 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3005}
3006
3007SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3008 SelectionDAG &DAG) const {
3009 // FIXME: TLS addresses currently use medium model code sequences,
3010 // which is the most useful form. Eventually support for small and
3011 // large models could be added if users need it, at the cost of
3012 // additional complexity.
3013 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3014 if (DAG.getTarget().useEmulatedTLS())
3015 return LowerToTLSEmulatedModel(GA, DAG);
3016
3017 SDLoc dl(GA);
3018 const GlobalValue *GV = GA->getGlobal();
3019 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3020 bool is64bit = Subtarget.isPPC64();
3021 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3022 PICLevel::Level picLevel = M->getPICLevel();
3023
3024 const TargetMachine &TM = getTargetMachine();
3025 TLSModel::Model Model = TM.getTLSModel(GV);
3026
3027 if (Model == TLSModel::LocalExec) {
3028 if (Subtarget.isUsingPCRelativeCalls()) {
3029 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3030 SDValue TGA = DAG.getTargetGlobalAddress(
3031 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3032 SDValue MatAddr =
3033 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3034 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3035 }
3036
3037 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3038 PPCII::MO_TPREL_HA);
3039 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3040 PPCII::MO_TPREL_LO);
3041 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3042 : DAG.getRegister(PPC::R2, MVT::i32);
3043
3044 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3045 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3046 }
3047
3048 if (Model == TLSModel::InitialExec) {
3049 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3050 SDValue TGA = DAG.getTargetGlobalAddress(
3051 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3052 SDValue TGATLS = DAG.getTargetGlobalAddress(
3053 GV, dl, PtrVT, 0,
3054 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3055 SDValue TPOffset;
3056 if (IsPCRel) {
3057 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3058 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3059 MachinePointerInfo());
3060 } else {
3061 SDValue GOTPtr;
3062 if (is64bit) {
3063 setUsesTOCBasePtr(DAG);
3064 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3065 GOTPtr =
3066 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3067 } else {
3068 if (!TM.isPositionIndependent())
3069 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3070 else if (picLevel == PICLevel::SmallPIC)
3071 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3072 else
3073 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3074 }
3075 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3076 }
3077 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3078 }
3079
3080 if (Model == TLSModel::GeneralDynamic) {
3081 if (Subtarget.isUsingPCRelativeCalls()) {
3082 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3083 PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3084 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3085 }
3086
3087 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3088 SDValue GOTPtr;
3089 if (is64bit) {
3090 setUsesTOCBasePtr(DAG);
3091 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3092 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3093 GOTReg, TGA);
3094 } else {
3095 if (picLevel == PICLevel::SmallPIC)
3096 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3097 else
3098 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3099 }
3100 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3101 GOTPtr, TGA, TGA);
3102 }
3103
3104 if (Model == TLSModel::LocalDynamic) {
3105 if (Subtarget.isUsingPCRelativeCalls()) {
3106 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3107 PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3108 SDValue MatPCRel =
3109 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3110 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3111 }
3112
3113 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3114 SDValue GOTPtr;
3115 if (is64bit) {
3116 setUsesTOCBasePtr(DAG);
3117 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3118 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3119 GOTReg, TGA);
3120 } else {
3121 if (picLevel == PICLevel::SmallPIC)
3122 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3123 else
3124 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3125 }
3126 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3127 PtrVT, GOTPtr, TGA, TGA);
3128 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3129 PtrVT, TLSAddr, TGA);
3130 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3131 }
3132
3133 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3133)
;
3134}
3135
3136SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3137 SelectionDAG &DAG) const {
3138 EVT PtrVT = Op.getValueType();
3139 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3140 SDLoc DL(GSDN);
3141 const GlobalValue *GV = GSDN->getGlobal();
3142
3143 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3144 // The actual address of the GlobalValue is stored in the TOC.
3145 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3146 if (Subtarget.isUsingPCRelativeCalls()) {
3147 EVT Ty = getPointerTy(DAG.getDataLayout());
3148 if (isAccessedAsGotIndirect(Op)) {
3149 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3150 PPCII::MO_PCREL_FLAG |
3151 PPCII::MO_GOT_FLAG);
3152 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3153 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3154 MachinePointerInfo());
3155 return Load;
3156 } else {
3157 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3158 PPCII::MO_PCREL_FLAG);
3159 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3160 }
3161 }
3162 setUsesTOCBasePtr(DAG);
3163 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3164 return getTOCEntry(DAG, DL, GA);
3165 }
3166
3167 unsigned MOHiFlag, MOLoFlag;
3168 bool IsPIC = isPositionIndependent();
3169 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3170
3171 if (IsPIC && Subtarget.isSVR4ABI()) {
3172 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3173 GSDN->getOffset(),
3174 PPCII::MO_PIC_FLAG);
3175 return getTOCEntry(DAG, DL, GA);
3176 }
3177
3178 SDValue GAHi =
3179 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3180 SDValue GALo =
3181 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3182
3183 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3184}
3185
3186SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3187 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3188 SDLoc dl(Op);
3189
3190 if (Op.getValueType() == MVT::v2i64) {
3191 // When the operands themselves are v2i64 values, we need to do something
3192 // special because VSX has no underlying comparison operations for these.
3193 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3194 // Equality can be handled by casting to the legal type for Altivec
3195 // comparisons, everything else needs to be expanded.
3196 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3197 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3198 DAG.getSetCC(dl, MVT::v4i32,
3199 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3200 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3201 CC));
3202 }
3203
3204 return SDValue();
3205 }
3206
3207 // We handle most of these in the usual way.
3208 return Op;
3209 }
3210
3211 // If we're comparing for equality to zero, expose the fact that this is
3212 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3213 // fold the new nodes.
3214 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3215 return V;
3216
3217 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3218 // Leave comparisons against 0 and -1 alone for now, since they're usually
3219 // optimized. FIXME: revisit this when we can custom lower all setcc
3220 // optimizations.
3221 if (C->isAllOnesValue() || C->isNullValue())
3222 return SDValue();
3223 }
3224
3225 // If we have an integer seteq/setne, turn it into a compare against zero
3226 // by xor'ing the rhs with the lhs, which is faster than setting a
3227 // condition register, reading it back out, and masking the correct bit. The
3228 // normal approach here uses sub to do this instead of xor. Using xor exposes
3229 // the result to other bit-twiddling opportunities.
3230 EVT LHSVT = Op.getOperand(0).getValueType();
3231 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3232 EVT VT = Op.getValueType();
3233 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3234 Op.getOperand(1));
3235 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3236 }
3237 return SDValue();
3238}
3239
3240SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3241 SDNode *Node = Op.getNode();
3242 EVT VT = Node->getValueType(0);
3243 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3244 SDValue InChain = Node->getOperand(0);
3245 SDValue VAListPtr = Node->getOperand(1);
3246 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3247 SDLoc dl(Node);
3248
3249 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3249, __PRETTY_FUNCTION__))
;
3250
3251 // gpr_index
3252 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3253 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3254 InChain = GprIndex.getValue(1);
3255
3256 if (VT == MVT::i64) {
3257 // Check if GprIndex is even
3258 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3259 DAG.getConstant(1, dl, MVT::i32));
3260 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3261 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3262 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3263 DAG.getConstant(1, dl, MVT::i32));
3264 // Align GprIndex to be even if it isn't
3265 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3266 GprIndex);
3267 }
3268
3269 // fpr index is 1 byte after gpr
3270 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3271 DAG.getConstant(1, dl, MVT::i32));
3272
3273 // fpr
3274 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3275 FprPtr, MachinePointerInfo(SV), MVT::i8);
3276 InChain = FprIndex.getValue(1);
3277
3278 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3279 DAG.getConstant(8, dl, MVT::i32));
3280
3281 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3282 DAG.getConstant(4, dl, MVT::i32));
3283
3284 // areas
3285 SDValue OverflowArea =
3286 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3287 InChain = OverflowArea.getValue(1);
3288
3289 SDValue RegSaveArea =
3290 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3291 InChain = RegSaveArea.getValue(1);
3292
3293 // select overflow_area if index > 8
3294 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3295 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3296
3297 // adjustment constant gpr_index * 4/8
3298 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3299 VT.isInteger() ? GprIndex : FprIndex,
3300 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3301 MVT::i32));
3302
3303 // OurReg = RegSaveArea + RegConstant
3304 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3305 RegConstant);
3306
3307 // Floating types are 32 bytes into RegSaveArea
3308 if (VT.isFloatingPoint())
3309 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3310 DAG.getConstant(32, dl, MVT::i32));
3311
3312 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3313 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3314 VT.isInteger() ? GprIndex : FprIndex,
3315 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3316 MVT::i32));
3317
3318 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3319 VT.isInteger() ? VAListPtr : FprPtr,
3320 MachinePointerInfo(SV), MVT::i8);
3321
3322 // determine if we should load from reg_save_area or overflow_area
3323 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3324
3325 // increase overflow_area by 4/8 if gpr/fpr > 8
3326 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3327 DAG.getConstant(VT.isInteger() ? 4 : 8,
3328 dl, MVT::i32));
3329
3330 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3331 OverflowAreaPlusN);
3332
3333 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3334 MachinePointerInfo(), MVT::i32);
3335
3336 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3337}
3338
3339SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3340 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3340, __PRETTY_FUNCTION__))
;
3341
3342 // We have to copy the entire va_list struct:
3343 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3344 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3345 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3346 false, true, false, MachinePointerInfo(),
3347 MachinePointerInfo());
3348}
3349
3350SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3351 SelectionDAG &DAG) const {
3352 if (Subtarget.isAIXABI())
3353 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3354
3355 return Op.getOperand(0);
3356}
3357
3358SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3359 SelectionDAG &DAG) const {
3360 if (Subtarget.isAIXABI())
3361 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3362
3363 SDValue Chain = Op.getOperand(0);
3364 SDValue Trmp = Op.getOperand(1); // trampoline
3365 SDValue FPtr = Op.getOperand(2); // nested function
3366 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3367 SDLoc dl(Op);
3368
3369 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3370 bool isPPC64 = (PtrVT == MVT::i64);
3371 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3372
3373 TargetLowering::ArgListTy Args;
3374 TargetLowering::ArgListEntry Entry;
3375
3376 Entry.Ty = IntPtrTy;
3377 Entry.Node = Trmp; Args.push_back(Entry);
3378
3379 // TrampSize == (isPPC64 ? 48 : 40);
3380 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3381 isPPC64 ? MVT::i64 : MVT::i32);
3382 Args.push_back(Entry);
3383
3384 Entry.Node = FPtr; Args.push_back(Entry);
3385 Entry.Node = Nest; Args.push_back(Entry);
3386
3387 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3388 TargetLowering::CallLoweringInfo CLI(DAG);
3389 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3390 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3391 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3392
3393 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3394 return CallResult.second;
3395}
3396
3397SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3398 MachineFunction &MF = DAG.getMachineFunction();
3399 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3400 EVT PtrVT = getPointerTy(MF.getDataLayout());
3401
3402 SDLoc dl(Op);
3403
3404 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3405 // vastart just stores the address of the VarArgsFrameIndex slot into the
3406 // memory location argument.
3407 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3408 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3409 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3410 MachinePointerInfo(SV));
3411 }
3412
3413 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3414 // We suppose the given va_list is already allocated.
3415 //
3416 // typedef struct {
3417 // char gpr; /* index into the array of 8 GPRs
3418 // * stored in the register save area
3419 // * gpr=0 corresponds to r3,
3420 // * gpr=1 to r4, etc.
3421 // */
3422 // char fpr; /* index into the array of 8 FPRs
3423 // * stored in the register save area
3424 // * fpr=0 corresponds to f1,
3425 // * fpr=1 to f2, etc.
3426 // */
3427 // char *overflow_arg_area;
3428 // /* location on stack that holds
3429 // * the next overflow argument
3430 // */
3431 // char *reg_save_area;
3432 // /* where r3:r10 and f1:f8 (if saved)
3433 // * are stored
3434 // */
3435 // } va_list[1];
3436
3437 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3438 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3439 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3440 PtrVT);
3441 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3442 PtrVT);
3443
3444 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3445 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3446
3447 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3448 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3449
3450 uint64_t FPROffset = 1;
3451 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3452
3453 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3454
3455 // Store first byte : number of int regs
3456 SDValue firstStore =
3457 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3458 MachinePointerInfo(SV), MVT::i8);
3459 uint64_t nextOffset = FPROffset;
3460 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3461 ConstFPROffset);
3462
3463 // Store second byte : number of float regs
3464 SDValue secondStore =
3465 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3466 MachinePointerInfo(SV, nextOffset), MVT::i8);
3467 nextOffset += StackOffset;
3468 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3469
3470 // Store second word : arguments given on stack
3471 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3472 MachinePointerInfo(SV, nextOffset));
3473 nextOffset += FrameOffset;
3474 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3475
3476 // Store third word : arguments given in registers
3477 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3478 MachinePointerInfo(SV, nextOffset));
3479}
3480
3481/// FPR - The set of FP registers that should be allocated for arguments
3482/// on Darwin and AIX.
3483static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3484 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3485 PPC::F11, PPC::F12, PPC::F13};
3486
3487/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3488/// the stack.
3489static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3490 unsigned PtrByteSize) {
3491 unsigned ArgSize = ArgVT.getStoreSize();
3492 if (Flags.isByVal())
3493 ArgSize = Flags.getByValSize();
3494
3495 // Round up to multiples of the pointer size, except for array members,
3496 // which are always packed.
3497 if (!Flags.isInConsecutiveRegs())
3498 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3499
3500 return ArgSize;
3501}
3502
3503/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3504/// on the stack.
3505static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3506 ISD::ArgFlagsTy Flags,
3507 unsigned PtrByteSize) {
3508 Align Alignment(PtrByteSize);
3509
3510 // Altivec parameters are padded to a 16 byte boundary.
3511 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3512 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3513 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3514 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3515 Alignment = Align(16);
3516
3517 // ByVal parameters are aligned as requested.
3518 if (Flags.isByVal()) {
3519 auto BVAlign = Flags.getNonZeroByValAlign();
3520 if (BVAlign > PtrByteSize) {
3521 if (BVAlign.value() % PtrByteSize != 0)
3522 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3523)
3523 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3523)
;
3524
3525 Alignment = BVAlign;
3526 }
3527 }
3528
3529 // Array members are always packed to their original alignment.
3530 if (Flags.isInConsecutiveRegs()) {
3531 // If the array member was split into multiple registers, the first
3532 // needs to be aligned to the size of the full type. (Except for
3533 // ppcf128, which is only aligned as its f64 components.)
3534 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3535 Alignment = Align(OrigVT.getStoreSize());
3536 else
3537 Alignment = Align(ArgVT.getStoreSize());
3538 }
3539
3540 return Alignment;
3541}
3542
3543/// CalculateStackSlotUsed - Return whether this argument will use its
3544/// stack slot (instead of being passed in registers). ArgOffset,
3545/// AvailableFPRs, and AvailableVRs must hold the current argument
3546/// position, and will be updated to account for this argument.
3547static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3548 unsigned PtrByteSize, unsigned LinkageSize,
3549 unsigned ParamAreaSize, unsigned &ArgOffset,
3550 unsigned &AvailableFPRs,
3551 unsigned &AvailableVRs) {
3552 bool UseMemory = false;
3553
3554 // Respect alignment of argument on the stack.
3555 Align Alignment =
3556 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3557 ArgOffset = alignTo(ArgOffset, Alignment);
3558 // If there's no space left in the argument save area, we must
3559 // use memory (this check also catches zero-sized arguments).
3560 if (ArgOffset >= LinkageSize + ParamAreaSize)
3561 UseMemory = true;
3562
3563 // Allocate argument on the stack.
3564 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3565 if (Flags.isInConsecutiveRegsLast())
3566 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3567 // If we overran the argument save area, we must use memory
3568 // (this check catches arguments passed partially in memory)
3569 if (ArgOffset > LinkageSize + ParamAreaSize)
3570 UseMemory = true;
3571
3572 // However, if the argument is actually passed in an FPR or a VR,
3573 // we don't use memory after all.
3574 if (!Flags.isByVal()) {
3575 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3576 if (AvailableFPRs > 0) {
3577 --AvailableFPRs;
3578 return false;
3579 }
3580 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3581 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3582 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3583 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3584 if (AvailableVRs > 0) {
3585 --AvailableVRs;
3586 return false;
3587 }
3588 }
3589
3590 return UseMemory;
3591}
3592
3593/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3594/// ensure minimum alignment required for target.
3595static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3596 unsigned NumBytes) {
3597 return alignTo(NumBytes, Lowering->getStackAlign());
3598}
3599
3600SDValue PPCTargetLowering::LowerFormalArguments(
3601 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3602 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3603 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3604 if (Subtarget.isAIXABI())
3605 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3606 InVals);
3607 if (Subtarget.is64BitELFABI())
3608 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3609 InVals);
3610 if (Subtarget.is32BitELFABI())
3611 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3612 InVals);
3613
3614 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3615 InVals);
3616}
3617
3618SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3619 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3620 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3621 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3622
3623 // 32-bit SVR4 ABI Stack Frame Layout:
3624 // +-----------------------------------+
3625 // +--> | Back chain |
3626 // | +-----------------------------------+
3627 // | | Floating-point register save area |
3628 // | +-----------------------------------+
3629 // | | General register save area |
3630 // | +-----------------------------------+
3631 // | | CR save word |
3632 // | +-----------------------------------+
3633 // | | VRSAVE save word |
3634 // | +-----------------------------------+
3635 // | | Alignment padding |
3636 // | +-----------------------------------+
3637 // | | Vector register save area |
3638 // | +-----------------------------------+
3639 // | | Local variable space |
3640 // | +-----------------------------------+
3641 // | | Parameter list area |
3642 // | +-----------------------------------+
3643 // | | LR save word |
3644 // | +-----------------------------------+
3645 // SP--> +--- | Back chain |
3646 // +-----------------------------------+
3647 //
3648 // Specifications:
3649 // System V Application Binary Interface PowerPC Processor Supplement
3650 // AltiVec Technology Programming Interface Manual
3651
3652 MachineFunction &MF = DAG.getMachineFunction();
3653 MachineFrameInfo &MFI = MF.getFrameInfo();
3654 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3655
3656 EVT PtrVT = getPointerTy(MF.getDataLayout());
3657 // Potential tail calls could cause overwriting of argument stack slots.
3658 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3659 (CallConv == CallingConv::Fast));
3660 const Align PtrAlign(4);
3661
3662 // Assign locations to all of the incoming arguments.
3663 SmallVector<CCValAssign, 16> ArgLocs;
3664 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3665 *DAG.getContext());
3666
3667 // Reserve space for the linkage area on the stack.
3668 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3669 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3670 if (useSoftFloat())
3671 CCInfo.PreAnalyzeFormalArguments(Ins);
3672
3673 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3674 CCInfo.clearWasPPCF128();
3675
3676 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3677 CCValAssign &VA = ArgLocs[i];
3678
3679 // Arguments stored in registers.
3680 if (VA.isRegLoc()) {
3681 const TargetRegisterClass *RC;
3682 EVT ValVT = VA.getValVT();
3683
3684 switch (ValVT.getSimpleVT().SimpleTy) {
3685 default:
3686 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3686)
;
3687 case MVT::i1:
3688 case MVT::i32:
3689 RC = &PPC::GPRCRegClass;
3690 break;
3691 case MVT::f32:
3692 if (Subtarget.hasP8Vector())
3693 RC = &PPC::VSSRCRegClass;
3694 else if (Subtarget.hasSPE())
3695 RC = &PPC::GPRCRegClass;
3696 else
3697 RC = &PPC::F4RCRegClass;
3698 break;
3699 case MVT::f64:
3700 if (Subtarget.hasVSX())
3701 RC = &PPC::VSFRCRegClass;
3702 else if (Subtarget.hasSPE())
3703 // SPE passes doubles in GPR pairs.
3704 RC = &PPC::GPRCRegClass;
3705 else
3706 RC = &PPC::F8RCRegClass;
3707 break;
3708 case MVT::v16i8:
3709 case MVT::v8i16:
3710 case MVT::v4i32:
3711 RC = &PPC::VRRCRegClass;
3712 break;
3713 case MVT::v4f32:
3714 RC = &PPC::VRRCRegClass;
3715 break;
3716 case MVT::v2f64:
3717 case MVT::v2i64:
3718 RC = &PPC::VRRCRegClass;
3719 break;
3720 }
3721
3722 SDValue ArgValue;
3723 // Transform the arguments stored in physical registers into
3724 // virtual ones.
3725 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3726 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3726, __PRETTY_FUNCTION__))
;
3727 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3728 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3729 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3730 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3731 if (!Subtarget.isLittleEndian())
3732 std::swap (ArgValueLo, ArgValueHi);
3733 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3734 ArgValueHi);
3735 } else {
3736 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3737 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3738 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3739 if (ValVT == MVT::i1)
3740 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3741 }
3742
3743 InVals.push_back(ArgValue);
3744 } else {
3745 // Argument stored in memory.
3746 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3746, __PRETTY_FUNCTION__))
;
3747
3748 // Get the extended size of the argument type in stack
3749 unsigned ArgSize = VA.getLocVT().getStoreSize();
3750 // Get the actual size of the argument type
3751 unsigned ObjSize = VA.getValVT().getStoreSize();
3752 unsigned ArgOffset = VA.getLocMemOffset();
3753 // Stack objects in PPC32 are right justified.
3754 ArgOffset += ArgSize - ObjSize;
3755 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3756
3757 // Create load nodes to retrieve arguments from the stack.
3758 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3759 InVals.push_back(
3760 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3761 }
3762 }
3763
3764 // Assign locations to all of the incoming aggregate by value arguments.
3765 // Aggregates passed by value are stored in the local variable space of the
3766 // caller's stack frame, right above the parameter list area.
3767 SmallVector<CCValAssign, 16> ByValArgLocs;
3768 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3769 ByValArgLocs, *DAG.getContext());
3770
3771 // Reserve stack space for the allocations in CCInfo.
3772 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3773
3774 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3775
3776 // Area that is at least reserved in the caller of this function.
3777 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3778 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3779
3780 // Set the size that is at least reserved in caller of this function. Tail
3781 // call optimized function's reserved stack space needs to be aligned so that
3782 // taking the difference between two stack areas will result in an aligned
3783 // stack.
3784 MinReservedArea =
3785 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3786 FuncInfo->setMinReservedArea(MinReservedArea);
3787
3788 SmallVector<SDValue, 8> MemOps;
3789
3790 // If the function takes variable number of arguments, make a frame index for
3791 // the start of the first vararg value... for expansion of llvm.va_start.
3792 if (isVarArg) {
3793 static const MCPhysReg GPArgRegs[] = {
3794 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3795 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3796 };
3797 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3798
3799 static const MCPhysReg FPArgRegs[] = {
3800 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3801 PPC::F8
3802 };
3803 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3804
3805 if (useSoftFloat() || hasSPE())
3806 NumFPArgRegs = 0;
3807
3808 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3809 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3810
3811 // Make room for NumGPArgRegs and NumFPArgRegs.
3812 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3813 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3814
3815 FuncInfo->setVarArgsStackOffset(
3816 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3817 CCInfo.getNextStackOffset(), true));
3818
3819 FuncInfo->setVarArgsFrameIndex(
3820 MFI.CreateStackObject(Depth, Align(8), false));
3821 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3822
3823 // The fixed integer arguments of a variadic function are stored to the
3824 // VarArgsFrameIndex on the stack so that they may be loaded by
3825 // dereferencing the result of va_next.
3826 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3827 // Get an existing live-in vreg, or add a new one.
3828 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3829 if (!VReg)
3830 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3831
3832 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3833 SDValue Store =
3834 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3835 MemOps.push_back(Store);
3836 // Increment the address by four for the next argument to store
3837 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3838 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3839 }
3840
3841 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3842 // is set.
3843 // The double arguments are stored to the VarArgsFrameIndex
3844 // on the stack.
3845 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3846 // Get an existing live-in vreg, or add a new one.
3847 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3848 if (!VReg)
3849 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3850
3851 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3852 SDValue Store =
3853 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3854 MemOps.push_back(Store);
3855 // Increment the address by eight for the next argument to store
3856 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3857 PtrVT);
3858 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3859 }
3860 }
3861
3862 if (!MemOps.empty())
3863 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3864
3865 return Chain;
3866}
3867
3868// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3869// value to MVT::i64 and then truncate to the correct register size.
3870SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3871 EVT ObjectVT, SelectionDAG &DAG,
3872 SDValue ArgVal,
3873 const SDLoc &dl) const {
3874 if (Flags.isSExt())
3875 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3876 DAG.getValueType(ObjectVT));
3877 else if (Flags.isZExt())
3878 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3879 DAG.getValueType(ObjectVT));
3880
3881 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3882}
3883
3884SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3885 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3886 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3887 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3888 // TODO: add description of PPC stack frame format, or at least some docs.
3889 //
3890 bool isELFv2ABI = Subtarget.isELFv2ABI();
3891 bool isLittleEndian = Subtarget.isLittleEndian();
3892 MachineFunction &MF = DAG.getMachineFunction();
3893 MachineFrameInfo &MFI = MF.getFrameInfo();
3894 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3895
3896 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__))
3897 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__))
;
3898
3899 EVT PtrVT = getPointerTy(MF.getDataLayout());
3900 // Potential tail calls could cause overwriting of argument stack slots.
3901 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3902 (CallConv == CallingConv::Fast));
3903 unsigned PtrByteSize = 8;
3904 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3905
3906 static const MCPhysReg GPR[] = {
3907 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3908 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3909 };
3910 static const MCPhysReg VR[] = {
3911 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3912 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3913 };
3914
3915 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3916 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3917 const unsigned Num_VR_Regs = array_lengthof(VR);
3918
3919 // Do a first pass over the arguments to determine whether the ABI
3920 // guarantees that our caller has allocated the parameter save area
3921 // on its stack frame. In the ELFv1 ABI, this is always the case;
3922 // in the ELFv2 ABI, it is true if this is a vararg function or if
3923 // any parameter is located in a stack slot.
3924
3925 bool HasParameterArea = !isELFv2ABI || isVarArg;
3926 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3927 unsigned NumBytes = LinkageSize;
3928 unsigned AvailableFPRs = Num_FPR_Regs;
3929 unsigned AvailableVRs = Num_VR_Regs;
3930 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3931 if (Ins[i].Flags.isNest())
3932 continue;
3933
3934 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3935 PtrByteSize, LinkageSize, ParamAreaSize,
3936 NumBytes, AvailableFPRs, AvailableVRs))
3937 HasParameterArea = true;
3938 }
3939
3940 // Add DAG nodes to load the arguments or copy them out of registers. On
3941 // entry to a function on PPC, the arguments start after the linkage area,
3942 // although the first ones are often in registers.
3943
3944 unsigned ArgOffset = LinkageSize;
3945 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3946 SmallVector<SDValue, 8> MemOps;
3947 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3948 unsigned CurArgIdx = 0;
3949 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3950 SDValue ArgVal;
3951 bool needsLoad = false;
3952 EVT ObjectVT = Ins[ArgNo].VT;
3953 EVT OrigVT = Ins[ArgNo].ArgVT;
3954 unsigned ObjSize = ObjectVT.getStoreSize();
3955 unsigned ArgSize = ObjSize;
3956 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3957 if (Ins[ArgNo].isOrigArg()) {
3958 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3959 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3960 }
3961 // We re-align the argument offset for each argument, except when using the
3962 // fast calling convention, when we need to make sure we do that only when
3963 // we'll actually use a stack slot.
3964 unsigned CurArgOffset;
3965 Align Alignment;
3966 auto ComputeArgOffset = [&]() {
3967 /* Respect alignment of argument on the stack. */
3968 Alignment =
3969 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3970 ArgOffset = alignTo(ArgOffset, Alignment);
3971 CurArgOffset = ArgOffset;
3972 };
3973
3974 if (CallConv != CallingConv::Fast) {
3975 ComputeArgOffset();
3976
3977 /* Compute GPR index associated with argument offset. */
3978 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3979 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3980 }
3981
3982 // FIXME the codegen can be much improved in some cases.
3983 // We do not have to keep everything in memory.
3984 if (Flags.isByVal()) {
3985 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3985, __PRETTY_FUNCTION__))
;
3986
3987 if (CallConv == CallingConv::Fast)
3988 ComputeArgOffset();
3989
3990 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3991 ObjSize = Flags.getByValSize();
3992 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3993 // Empty aggregate parameters do not take up registers. Examples:
3994 // struct { } a;
3995 // union { } b;
3996 // int c[0];
3997 // etc. However, we have to provide a place-holder in InVals, so
3998 // pretend we have an 8-byte item at the current address for that
3999 // purpose.
4000 if (!ObjSize) {
4001 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4002 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4003 InVals.push_back(FIN);
4004 continue;
4005 }
4006
4007 // Create a stack object covering all stack doublewords occupied
4008 // by the argument. If the argument is (fully or partially) on
4009 // the stack, or if the argument is fully in registers but the
4010 // caller has allocated the parameter save anyway, we can refer
4011 // directly to the caller's stack frame. Otherwise, create a
4012 // local copy in our own frame.
4013 int FI;
4014 if (HasParameterArea ||
4015 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4016 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4017 else
4018 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4019 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4020
4021 // Handle aggregates smaller than 8 bytes.
4022 if (ObjSize < PtrByteSize) {
4023 // The value of the object is its address, which differs from the
4024 // address of the enclosing doubleword on big-endian systems.
4025 SDValue Arg = FIN;
4026 if (!isLittleEndian) {
4027 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4028 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4029 }
4030 InVals.push_back(Arg);
4031
4032 if (GPR_idx != Num_GPR_Regs) {
4033 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4034 FuncInfo->addLiveInAttr(VReg, Flags);
4035 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4036 SDValue Store;
4037
4038 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4039 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4040 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4041 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4042 MachinePointerInfo(&*FuncArg), ObjType);
4043 } else {
4044 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4045 // store the whole register as-is to the parameter save area
4046 // slot.
4047 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4048 MachinePointerInfo(&*FuncArg));
4049 }
4050
4051 MemOps.push_back(Store);
4052 }
4053 // Whether we copied from a register or not, advance the offset
4054 // into the parameter save area by a full doubleword.
4055 ArgOffset += PtrByteSize;
4056 continue;
4057 }
4058
4059 // The value of the object is its address, which is the address of
4060 // its first stack doubleword.
4061 InVals.push_back(FIN);
4062
4063 // Store whatever pieces of the object are in registers to memory.
4064 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4065 if (GPR_idx == Num_GPR_Regs)
4066 break;
4067
4068 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4069 FuncInfo->addLiveInAttr(VReg, Flags);
4070 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4071 SDValue Addr = FIN;
4072 if (j) {
4073 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4074 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4075 }
4076 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4077 MachinePointerInfo(&*FuncArg, j));
4078 MemOps.push_back(Store);
4079 ++GPR_idx;
4080 }
4081 ArgOffset += ArgSize;
4082 continue;
4083 }
4084
4085 switch (ObjectVT.getSimpleVT().SimpleTy) {
4086 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4086)
;
4087 case MVT::i1:
4088 case MVT::i32:
4089 case MVT::i64:
4090 if (Flags.isNest()) {
4091 // The 'nest' parameter, if any, is passed in R11.
4092 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4093 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4094
4095 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4096 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4097
4098 break;
4099 }
4100
4101 // These can be scalar arguments or elements of an integer array type
4102 // passed directly. Clang may use those instead of "byval" aggregate
4103 // types to avoid forcing arguments to memory unnecessarily.
4104 if (GPR_idx != Num_GPR_Regs) {
4105 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4106 FuncInfo->addLiveInAttr(VReg, Flags);
4107 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4108
4109 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4110 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4111 // value to MVT::i64 and then truncate to the correct register size.
4112 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4113 } else {
4114 if (CallConv == CallingConv::Fast)
4115 ComputeArgOffset();
4116
4117 needsLoad = true;
4118 ArgSize = PtrByteSize;
4119 }
4120 if (CallConv != CallingConv::Fast || needsLoad)
4121 ArgOffset += 8;
4122 break;
4123
4124 case MVT::f32:
4125 case MVT::f64:
4126 // These can be scalar arguments or elements of a float array type
4127 // passed directly. The latter are used to implement ELFv2 homogenous
4128 // float aggregates.
4129 if (FPR_idx != Num_FPR_Regs) {
4130 unsigned VReg;
4131
4132 if (ObjectVT == MVT::f32)
4133 VReg = MF.addLiveIn(FPR[FPR_idx],
4134 Subtarget.hasP8Vector()
4135 ? &PPC::VSSRCRegClass
4136 : &PPC::F4RCRegClass);
4137 else
4138 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4139 ? &PPC::VSFRCRegClass
4140 : &PPC::F8RCRegClass);
4141
4142 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4143 ++FPR_idx;
4144 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4145 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4146 // once we support fp <-> gpr moves.
4147
4148 // This can only ever happen in the presence of f32 array types,
4149 // since otherwise we never run out of FPRs before running out
4150 // of GPRs.
4151 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4152 FuncInfo->addLiveInAttr(VReg, Flags);
4153 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4154
4155 if (ObjectVT == MVT::f32) {
4156 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4157 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4158 DAG.getConstant(32, dl, MVT::i32));
4159 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4160 }
4161
4162 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4163 } else {
4164 if (CallConv == CallingConv::Fast)
4165 ComputeArgOffset();
4166
4167 needsLoad = true;
4168 }
4169
4170 // When passing an array of floats, the array occupies consecutive
4171 // space in the argument area; only round up to the next doubleword
4172 // at the end of the array. Otherwise, each float takes 8 bytes.
4173 if (CallConv != CallingConv::Fast || needsLoad) {
4174 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4175 ArgOffset += ArgSize;
4176 if (Flags.isInConsecutiveRegsLast())
4177 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4178 }
4179 break;
4180 case MVT::v4f32:
4181 case MVT::v4i32:
4182 case MVT::v8i16:
4183 case MVT::v16i8:
4184 case MVT::v2f64:
4185 case MVT::v2i64:
4186 case MVT::v1i128:
4187 case MVT::f128:
4188 // These can be scalar arguments or elements of a vector array type
4189 // passed directly. The latter are used to implement ELFv2 homogenous
4190 // vector aggregates.
4191 if (VR_idx != Num_VR_Regs) {
4192 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4193 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4194 ++VR_idx;
4195 } else {
4196 if (CallConv == CallingConv::Fast)
4197 ComputeArgOffset();
4198 needsLoad = true;
4199 }
4200 if (CallConv != CallingConv::Fast || needsLoad)
4201 ArgOffset += 16;
4202 break;
4203 }
4204
4205 // We need to load the argument to a virtual register if we determined
4206 // above that we ran out of physical registers of the appropriate type.
4207 if (needsLoad) {
4208 if (ObjSize < ArgSize && !isLittleEndian)
4209 CurArgOffset += ArgSize - ObjSize;
4210 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4211 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4212 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4213 }
4214
4215 InVals.push_back(ArgVal);
4216 }
4217
4218 // Area that is at least reserved in the caller of this function.
4219 unsigned MinReservedArea;
4220 if (HasParameterArea)
4221 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4222 else
4223 MinReservedArea = LinkageSize;
4224
4225 // Set the size that is at least reserved in caller of this function. Tail
4226 // call optimized functions' reserved stack space needs to be aligned so that
4227 // taking the difference between two stack areas will result in an aligned
4228 // stack.
4229 MinReservedArea =
4230 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4231 FuncInfo->setMinReservedArea(MinReservedArea);
4232
4233 // If the function takes variable number of arguments, make a frame index for
4234 // the start of the first vararg value... for expansion of llvm.va_start.
4235 // On ELFv2ABI spec, it writes:
4236 // C programs that are intended to be *portable* across different compilers
4237 // and architectures must use the header file <stdarg.h> to deal with variable
4238 // argument lists.
4239 if (isVarArg && MFI.hasVAStart()) {
4240 int Depth = ArgOffset;
4241
4242 FuncInfo->setVarArgsFrameIndex(
4243 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4244 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4245
4246 // If this function is vararg, store any remaining integer argument regs
4247 // to their spots on the stack so that they may be loaded by dereferencing
4248 // the result of va_next.
4249 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4250 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4251 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4252 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4253 SDValue Store =
4254 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4255 MemOps.push_back(Store);
4256 // Increment the address by four for the next argument to store
4257 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4258 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4259 }
4260 }
4261
4262 if (!MemOps.empty())
4263 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4264
4265 return Chain;
4266}
4267
4268SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4269 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4270 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4271 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4272 // TODO: add description of PPC stack frame format, or at least some docs.
4273 //
4274 MachineFunction &MF = DAG.getMachineFunction();
4275 MachineFrameInfo &MFI = MF.getFrameInfo();
4276 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4277
4278 EVT PtrVT = getPointerTy(MF.getDataLayout());
4279 bool isPPC64 = PtrVT == MVT::i64;
4280 // Potential tail calls could cause overwriting of argument stack slots.
4281 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4282 (CallConv == CallingConv::Fast));
4283 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4284 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4285 unsigned ArgOffset = LinkageSize;
4286 // Area that is at least reserved in caller of this function.
4287 unsigned MinReservedArea = ArgOffset;
4288
4289 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4290 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4291 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4292 };
4293 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4294 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4295 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4296 };
4297 static const MCPhysReg VR[] = {
4298 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4299 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4300 };
4301
4302 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4303 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4304 const unsigned Num_VR_Regs = array_lengthof( VR);
4305
4306 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4307
4308 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4309
4310 // In 32-bit non-varargs functions, the stack space for vectors is after the
4311 // stack space for non-vectors. We do not use this space unless we have
4312 // too many vectors to fit in registers, something that only occurs in
4313 // constructed examples:), but we have to walk the arglist to figure
4314 // that out...for the pathological case, compute VecArgOffset as the
4315 // start of the vector parameter area. Computing VecArgOffset is the
4316 // entire point of the following loop.
4317 unsigned VecArgOffset = ArgOffset;
4318 if (!isVarArg && !isPPC64) {
4319 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4320 ++ArgNo) {
4321 EVT ObjectVT = Ins[ArgNo].VT;
4322 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4323
4324 if (Flags.isByVal()) {
4325 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4326 unsigned ObjSize = Flags.getByValSize();
4327 unsigned ArgSize =
4328 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4329 VecArgOffset += ArgSize;
4330 continue;
4331 }
4332
4333 switch(ObjectVT.getSimpleVT().SimpleTy) {
4334 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4334)
;
4335 case MVT::i1:
4336 case MVT::i32:
4337 case MVT::f32:
4338 VecArgOffset += 4;
4339 break;
4340 case MVT::i64: // PPC64
4341 case MVT::f64:
4342 // FIXME: We are guaranteed to be !isPPC64 at this point.
4343 // Does MVT::i64 apply?
4344 VecArgOffset += 8;
4345 break;
4346 case MVT::v4f32:
4347 case MVT::v4i32:
4348 case MVT::v8i16:
4349 case MVT::v16i8:
4350 // Nothing to do, we're only looking at Nonvector args here.
4351 break;
4352 }
4353 }
4354 }
4355 // We've found where the vector parameter area in memory is. Skip the
4356 // first 12 parameters; these don't use that memory.
4357 VecArgOffset = ((VecArgOffset+15)/16)*16;
4358 VecArgOffset += 12*16;
4359
4360 // Add DAG nodes to load the arguments or copy them out of registers. On
4361 // entry to a function on PPC, the arguments start after the linkage area,
4362 // although the first ones are often in registers.
4363
4364 SmallVector<SDValue, 8> MemOps;
4365 unsigned nAltivecParamsAtEnd = 0;
4366 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4367 unsigned CurArgIdx = 0;
4368 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4369 SDValue ArgVal;
4370 bool needsLoad = false;
4371 EVT ObjectVT = Ins[ArgNo].VT;
4372 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4373 unsigned ArgSize = ObjSize;
4374 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4375 if (Ins[ArgNo].isOrigArg()) {
4376 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4377 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4378 }
4379 unsigned CurArgOffset = ArgOffset;
4380
4381 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4382 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4383 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4384 if (isVarArg || isPPC64) {
4385 MinReservedArea = ((MinReservedArea+15)/16)*16;
4386 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4387 Flags,
4388 PtrByteSize);
4389 } else nAltivecParamsAtEnd++;
4390 } else
4391 // Calculate min reserved area.
4392 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4393 Flags,
4394 PtrByteSize);
4395
4396 // FIXME the codegen can be much improved in some cases.
4397 // We do not have to keep everything in memory.
4398 if (Flags.isByVal()) {
4399 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4399, __PRETTY_FUNCTION__))
;
4400
4401 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4402 ObjSize = Flags.getByValSize();
4403 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4404 // Objects of size 1 and 2 are right justified, everything else is
4405 // left justified. This means the memory address is adjusted forwards.
4406 if (ObjSize==1 || ObjSize==2) {
4407 CurArgOffset = CurArgOffset + (4 - ObjSize);
4408 }
4409 // The value of the object is its address.
4410 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4411 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4412 InVals.push_back(FIN);
4413 if (ObjSize==1 || ObjSize==2) {
4414 if (GPR_idx != Num_GPR_Regs) {
4415 unsigned VReg;
4416 if (isPPC64)
4417 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4418 else
4419 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4420 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4422 SDValue Store =
4423 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4424 MachinePointerInfo(&*FuncArg), ObjType);
4425 MemOps.push_back(Store);
4426 ++GPR_idx;
4427 }
4428
4429 ArgOffset += PtrByteSize;
4430
4431 continue;
4432 }
4433 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4434 // Store whatever pieces of the object are in registers
4435 // to memory. ArgOffset will be the address of the beginning
4436 // of the object.
4437 if (GPR_idx != Num_GPR_Regs) {
4438 unsigned VReg;
4439 if (isPPC64)
4440 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4441 else
4442 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4443 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4444 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4445 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4446 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4447 MachinePointerInfo(&*FuncArg, j));
4448 MemOps.push_back(Store);
4449 ++GPR_idx;
4450 ArgOffset += PtrByteSize;
4451 } else {
4452 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4453 break;
4454 }
4455 }
4456 continue;
4457 }
4458
4459 switch (ObjectVT.getSimpleVT().SimpleTy) {
4460 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4460)
;
4461 case MVT::i1:
4462 case MVT::i32:
4463 if (!isPPC64) {
4464 if (GPR_idx != Num_GPR_Regs) {
4465 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4466 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4467
4468 if (ObjectVT == MVT::i1)
4469 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4470
4471 ++GPR_idx;
4472 } else {
4473 needsLoad = true;
4474 ArgSize = PtrByteSize;
4475 }
4476 // All int arguments reserve stack space in the Darwin ABI.
4477 ArgOffset += PtrByteSize;
4478 break;
4479 }
4480 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4481 case MVT::i64: // PPC64
4482 if (GPR_idx != Num_GPR_Regs) {
4483 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4484 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4485
4486 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4487 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4488 // value to MVT::i64 and then truncate to the correct register size.
4489 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4490
4491 ++GPR_idx;
4492 } else {
4493 needsLoad = true;
4494 ArgSize = PtrByteSize;
4495 }
4496 // All int arguments reserve stack space in the Darwin ABI.
4497 ArgOffset += 8;
4498 break;
4499
4500 case MVT::f32:
4501 case MVT::f64:
4502 // Every 4 bytes of argument space consumes one of the GPRs available for
4503 // argument passing.
4504 if (GPR_idx != Num_GPR_Regs) {
4505 ++GPR_idx;
4506 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4507 ++GPR_idx;
4508 }
4509 if (FPR_idx != Num_FPR_Regs) {
4510 unsigned VReg;
4511
4512 if (ObjectVT == MVT::f32)
4513 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4514 else
4515 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4516
4517 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4518 ++FPR_idx;
4519 } else {
4520 needsLoad = true;
4521 }
4522
4523 // All FP arguments reserve stack space in the Darwin ABI.
4524 ArgOffset += isPPC64 ? 8 : ObjSize;
4525 break;
4526 case MVT::v4f32:
4527 case MVT::v4i32:
4528 case MVT::v8i16:
4529 case MVT::v16i8:
4530 // Note that vector arguments in registers don't reserve stack space,
4531 // except in varargs functions.
4532 if (VR_idx != Num_VR_Regs) {
4533 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4534 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4535 if (isVarArg) {
4536 while ((ArgOffset % 16) != 0) {
4537 ArgOffset += PtrByteSize;
4538 if (GPR_idx != Num_GPR_Regs)
4539 GPR_idx++;
4540 }
4541 ArgOffset += 16;
4542 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4543 }
4544 ++VR_idx;
4545 } else {
4546 if (!isVarArg && !isPPC64) {
4547 // Vectors go after all the nonvectors.
4548 CurArgOffset = VecArgOffset;
4549 VecArgOffset += 16;
4550 } else {
4551 // Vectors are aligned.
4552 ArgOffset = ((ArgOffset+15)/16)*16;
4553 CurArgOffset = ArgOffset;
4554 ArgOffset += 16;
4555 }
4556 needsLoad = true;
4557 }
4558 break;
4559 }
4560
4561 // We need to load the argument to a virtual register if we determined above
4562 // that we ran out of physical registers of the appropriate type.
4563 if (needsLoad) {
4564 int FI = MFI.CreateFixedObject(ObjSize,
4565 CurArgOffset + (ArgSize - ObjSize),
4566 isImmutable);
4567 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4568 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4569 }
4570
4571 InVals.push_back(ArgVal);
4572 }
4573
4574 // Allow for Altivec parameters at the end, if needed.
4575 if (nAltivecParamsAtEnd) {
4576 MinReservedArea = ((MinReservedArea+15)/16)*16;
4577 MinReservedArea += 16*nAltivecParamsAtEnd;
4578 }
4579
4580 // Area that is at least reserved in the caller of this function.
4581 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4582
4583 // Set the size that is at least reserved in caller of this function. Tail
4584 // call optimized functions' reserved stack space needs to be aligned so that
4585 // taking the difference between two stack areas will result in an aligned
4586 // stack.
4587 MinReservedArea =
4588 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4589 FuncInfo->setMinReservedArea(MinReservedArea);
4590
4591 // If the function takes variable number of arguments, make a frame index for
4592 // the start of the first vararg value... for expansion of llvm.va_start.
4593 if (isVarArg) {
4594 int Depth = ArgOffset;
4595
4596 FuncInfo->setVarArgsFrameIndex(
4597 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4598 Depth, true));
4599 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4600
4601 // If this function is vararg, store any remaining integer argument regs
4602 // to their spots on the stack so that they may be loaded by dereferencing
4603 // the result of va_next.
4604 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4605 unsigned VReg;
4606
4607 if (isPPC64)
4608 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4609 else
4610 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4611
4612 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4613 SDValue Store =
4614 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4615 MemOps.push_back(Store);
4616 // Increment the address by four for the next argument to store
4617 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4618 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4619 }
4620 }
4621
4622 if (!MemOps.empty())
4623 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4624
4625 return Chain;
4626}
4627
4628/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4629/// adjusted to accommodate the arguments for the tailcall.
4630static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4631 unsigned ParamSize) {
4632
4633 if (!isTailCall) return 0;
4634
4635 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4636 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4637 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4638 // Remember only if the new adjustment is bigger.
4639 if (SPDiff < FI->getTailCallSPDelta())
4640 FI->setTailCallSPDelta(SPDiff);
4641
4642 return SPDiff;
4643}
4644
4645static bool isFunctionGlobalAddress(SDValue Callee);
4646
4647static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4648 const TargetMachine &TM) {
4649 // It does not make sense to call callsShareTOCBase() with a caller that
4650 // is PC Relative since PC Relative callers do not have a TOC.
4651#ifndef NDEBUG
4652 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4653 assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4654, __PRETTY_FUNCTION__))
4654 "PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4654, __PRETTY_FUNCTION__))
;
4655#endif
4656
4657 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4658 // don't have enough information to determine if the caller and callee share
4659 // the same TOC base, so we have to pessimistically assume they don't for
4660 // correctness.
4661 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4662 if (!G)
4663 return false;
4664
4665 const GlobalValue *GV = G->getGlobal();
4666
4667 // If the callee is preemptable, then the static linker will use a plt-stub
4668 // which saves the toc to the stack, and needs a nop after the call
4669 // instruction to convert to a toc-restore.
4670 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4671 return false;
4672
4673 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4674 // We may need a TOC restore in the situation where the caller requires a
4675 // valid TOC but the callee is PC Relative and does not.
4676 const Function *F = dyn_cast<Function>(GV);
4677 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4678
4679 // If we have an Alias we can try to get the function from there.
4680 if (Alias) {
4681 const GlobalObject *GlobalObj = Alias->getBaseObject();
4682 F = dyn_cast<Function>(GlobalObj);
4683 }
4684
4685 // If we still have no valid function pointer we do not have enough
4686 // information to determine if the callee uses PC Relative calls so we must
4687 // assume that it does.
4688 if (!F)
4689 return false;
4690
4691 // If the callee uses PC Relative we cannot guarantee that the callee won't
4692 // clobber the TOC of the caller and so we must assume that the two
4693 // functions do not share a TOC base.
4694 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4695 if (STICallee->isUsingPCRelativeCalls())
4696 return false;
4697
4698 // The medium and large code models are expected to provide a sufficiently
4699 // large TOC to provide all data addressing needs of a module with a
4700 // single TOC.
4701 if (CodeModel::Medium == TM.getCodeModel() ||
4702 CodeModel::Large == TM.getCodeModel())
4703 return true;
4704
4705 // Otherwise we need to ensure callee and caller are in the same section,
4706 // since the linker may allocate multiple TOCs, and we don't know which
4707 // sections will belong to the same TOC base.
4708 if (!GV->isStrongDefinitionForLinker())
4709 return false;
4710
4711 // Any explicitly-specified sections and section prefixes must also match.
4712 // Also, if we're using -ffunction-sections, then each function is always in
4713 // a different section (the same is true for COMDAT functions).
4714 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4715 GV->getSection() != Caller->getSection())
4716 return false;
4717 if (const auto *F = dyn_cast<Function>(GV)) {
4718 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4719 return false;
4720 }
4721
4722 return true;
4723}
4724
4725static bool
4726needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4727 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4728 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4728, __PRETTY_FUNCTION__))
;
4729
4730 const unsigned PtrByteSize = 8;
4731 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4732
4733 static const MCPhysReg GPR[] = {
4734 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4735 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4736 };
4737 static const MCPhysReg VR[] = {
4738 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4739 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4740 };
4741
4742 const unsigned NumGPRs = array_lengthof(GPR);
4743 const unsigned NumFPRs = 13;
4744 const unsigned NumVRs = array_lengthof(VR);
4745 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4746
4747 unsigned NumBytes = LinkageSize;
4748 unsigned AvailableFPRs = NumFPRs;
4749 unsigned AvailableVRs = NumVRs;
4750
4751 for (const ISD::OutputArg& Param : Outs) {
4752 if (Param.Flags.isNest()) continue;
4753
4754 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4755 LinkageSize, ParamAreaSize, NumBytes,
4756 AvailableFPRs, AvailableVRs))
4757 return true;
4758 }
4759 return false;
4760}
4761
4762static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4763 if (CB.arg_size() != CallerFn->arg_size())
4764 return false;
4765
4766 auto CalleeArgIter = CB.arg_begin();
4767 auto CalleeArgEnd = CB.arg_end();
4768 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4769
4770 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4771 const Value* CalleeArg = *CalleeArgIter;
4772 const Value* CallerArg = &(*CallerArgIter);
4773 if (CalleeArg == CallerArg)
4774 continue;
4775
4776 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4777 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4778 // }
4779 // 1st argument of callee is undef and has the same type as caller.
4780 if (CalleeArg->getType() == CallerArg->getType() &&
4781 isa<UndefValue>(CalleeArg))
4782 continue;
4783
4784 return false;
4785 }
4786
4787 return true;
4788}
4789
4790// Returns true if TCO is possible between the callers and callees
4791// calling conventions.
4792static bool
4793areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4794 CallingConv::ID CalleeCC) {
4795 // Tail calls are possible with fastcc and ccc.
4796 auto isTailCallableCC = [] (CallingConv::ID CC){
4797 return CC == CallingConv::C || CC == CallingConv::Fast;
4798 };
4799 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4800 return false;
4801
4802 // We can safely tail call both fastcc and ccc callees from a c calling
4803 // convention caller. If the caller is fastcc, we may have less stack space
4804 // than a non-fastcc caller with the same signature so disable tail-calls in
4805 // that case.
4806 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4807}
4808
4809bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4810 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4811 const SmallVectorImpl<ISD::OutputArg> &Outs,
4812 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4813 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4814
4815 if (DisableSCO && !TailCallOpt) return false;
4816
4817 // Variadic argument functions are not supported.
4818 if (isVarArg) return false;
4819
4820 auto &Caller = DAG.getMachineFunction().getFunction();
4821 // Check that the calling conventions are compatible for tco.
4822 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4823 return false;
4824
4825 // Caller contains any byval parameter is not supported.
4826 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4827 return false;
4828
4829 // Callee contains any byval parameter is not supported, too.
4830 // Note: This is a quick work around, because in some cases, e.g.
4831 // caller's stack size > callee's stack size, we are still able to apply
4832 // sibling call optimization. For example, gcc is able to do SCO for caller1
4833 // in the following example, but not for caller2.
4834 // struct test {
4835 // long int a;
4836 // char ary[56];
4837 // } gTest;
4838 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4839 // b->a = v.a;
4840 // return 0;
4841 // }
4842 // void caller1(struct test a, struct test c, struct test *b) {
4843 // callee(gTest, b); }
4844 // void caller2(struct test *b) { callee(gTest, b); }
4845 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4846 return false;
4847
4848 // If callee and caller use different calling conventions, we cannot pass
4849 // parameters on stack since offsets for the parameter area may be different.
4850 if (Caller.getCallingConv() != CalleeCC &&
4851 needStackSlotPassParameters(Subtarget, Outs))
4852 return false;
4853
4854 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4855 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4856 // callee potentially have different TOC bases then we cannot tail call since
4857 // we need to restore the TOC pointer after the call.
4858 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4859 // We cannot guarantee this for indirect calls or calls to external functions.
4860 // When PC-Relative addressing is used, the concept of the TOC is no longer
4861 // applicable so this check is not required.
4862 // Check first for indirect calls.
4863 if (!Subtarget.isUsingPCRelativeCalls() &&
4864 !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4865 return false;
4866
4867 // Check if we share the TOC base.
4868 if (!Subtarget.isUsingPCRelativeCalls() &&
4869 !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4870 return false;
4871
4872 // TCO allows altering callee ABI, so we don't have to check further.
4873 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4874 return true;
4875
4876 if (DisableSCO) return false;
4877
4878 // If callee use the same argument list that caller is using, then we can
4879 // apply SCO on this case. If it is not, then we need to check if callee needs
4880 // stack for passing arguments.
4881 // PC Relative tail calls may not have a CallBase.
4882 // If there is no CallBase we cannot verify if we have the same argument
4883 // list so assume that we don't have the same argument list.
4884 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4885 needStackSlotPassParameters(Subtarget, Outs))
4886 return false;
4887 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4888 return false;
4889
4890 return true;
4891}
4892
4893/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4894/// for tail call optimization. Targets which want to do tail call
4895/// optimization should implement this function.
4896bool
4897PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4898 CallingConv::ID CalleeCC,
4899 bool isVarArg,
4900 const SmallVectorImpl<ISD::InputArg> &Ins,
4901 SelectionDAG& DAG) const {
4902 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4903 return false;
4904
4905 // Variable argument functions are not supported.
4906 if (isVarArg)
4907 return false;
4908
4909 MachineFunction &MF = DAG.getMachineFunction();
4910 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4911 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4912 // Functions containing by val parameters are not supported.
4913 for (unsigned i = 0; i != Ins.size(); i++) {
4914 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4915 if (Flags.isByVal()) return false;
4916 }
4917
4918 // Non-PIC/GOT tail calls are supported.
4919 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4920 return true;
4921
4922 // At the moment we can only do local tail calls (in same module, hidden
4923 // or protected) if we are generating PIC.
4924 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4925 return G->getGlobal()->hasHiddenVisibility()
4926 || G->getGlobal()->hasProtectedVisibility();
4927 }
4928
4929 return false;
4930}
4931
4932/// isCallCompatibleAddress - Return the immediate to use if the specified
4933/// 32-bit value is representable in the immediate field of a BxA instruction.
4934static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4935 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4936 if (!C) return nullptr;
4937
4938 int Addr = C->getZExtValue();
4939 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4940 SignExtend32<26>(Addr) != Addr)
4941 return nullptr; // Top 6 bits have to be sext of immediate.
4942
4943 return DAG
4944 .getConstant(
4945 (int)C->getZExtValue() >> 2, SDLoc(Op),
4946 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4947 .getNode();
4948}
4949
4950namespace {
4951
4952struct TailCallArgumentInfo {
4953 SDValue Arg;
4954 SDValue FrameIdxOp;
4955 int FrameIdx = 0;
4956
4957 TailCallArgumentInfo() = default;
4958};
4959
4960} // end anonymous namespace
4961
4962/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4963static void StoreTailCallArgumentsToStackSlot(
4964 SelectionDAG &DAG, SDValue Chain,
4965 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4966 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4967 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4968 SDValue Arg = TailCallArgs[i].Arg;
4969 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4970 int FI = TailCallArgs[i].FrameIdx;
4971 // Store relative to framepointer.
4972 MemOpChains.push_back(DAG.getStore(
4973 Chain, dl, Arg, FIN,
4974 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4975 }
4976}
4977
4978/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4979/// the appropriate stack slot for the tail call optimized function call.
4980static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4981 SDValue OldRetAddr, SDValue OldFP,
4982 int SPDiff, const SDLoc &dl) {
4983 if (SPDiff) {
4984 // Calculate the new stack slot for the return address.
4985 MachineFunction &MF = DAG.getMachineFunction();
4986 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4987 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4988 bool isPPC64 = Subtarget.isPPC64();
4989 int SlotSize = isPPC64 ? 8 : 4;
4990 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4991 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4992 NewRetAddrLoc, true);
4993 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4994 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4995 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4996 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4997 }
4998 return Chain;
4999}
5000
5001/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5002/// the position of the argument.
5003static void
5004CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5005 SDValue Arg, int SPDiff, unsigned ArgOffset,
5006 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5007 int Offset = ArgOffset + SPDiff;
5008 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5009 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5010 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5011 SDValue FIN = DAG.getFrameIndex(FI, VT);
5012 TailCallArgumentInfo Info;
5013 Info.Arg = Arg;
5014 Info.FrameIdxOp = FIN;
5015 Info.FrameIdx = FI;
5016 TailCallArguments.push_back(Info);
5017}
5018
5019/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5020/// stack slot. Returns the chain as result and the loaded frame pointers in
5021/// LROpOut/FPOpout. Used when tail calling.
5022SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5023 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5024 SDValue &FPOpOut, const SDLoc &dl) const {
5025 if (SPDiff) {
5026 // Load the LR and FP stack slot for later adjusting.
5027 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5028 LROpOut = getReturnAddrFrameIndex(DAG);
5029 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5030 Chain = SDValue(LROpOut.getNode(), 1);
5031 }
5032 return Chain;
5033}
5034
5035/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5036/// by "Src" to address "Dst" of size "Size". Alignment information is
5037/// specified by the specific parameter attribute. The copy will be passed as
5038/// a byval function parameter.
5039/// Sometimes what we are copying is the end of a larger object, the part that
5040/// does not fit in registers.
5041static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5042 SDValue Chain, ISD::ArgFlagsTy Flags,
5043 SelectionDAG &DAG, const SDLoc &dl) {
5044 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5045 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5046 Flags.getNonZeroByValAlign(), false, false, false,
5047 MachinePointerInfo(), MachinePointerInfo());
5048}
5049
5050/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5051/// tail calls.
5052static void LowerMemOpCallTo(
5053 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5054 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5055 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5056 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5057 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5058 if (!isTailCall) {
5059 if (isVector) {
5060 SDValue StackPtr;
5061 if (isPPC64)
5062 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5063 else
5064 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5065 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5066 DAG.getConstant(ArgOffset, dl, PtrVT));
5067 }
5068 MemOpChains.push_back(
5069 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5070 // Calculate and remember argument location.
5071 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5072 TailCallArguments);
5073}
5074
5075static void
5076PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5077 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5078 SDValue FPOp,
5079 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5080 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5081 // might overwrite each other in case of tail call optimization.
5082 SmallVector<SDValue, 8> MemOpChains2;
5083 // Do not flag preceding copytoreg stuff together with the following stuff.
5084 InFlag = SDValue();
5085 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5086 MemOpChains2, dl);
5087 if (!MemOpChains2.empty())
5088 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5089
5090 // Store the return address to the appropriate stack slot.
5091 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5092
5093 // Emit callseq_end just before tailcall node.
5094 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5095 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5096 InFlag = Chain.getValue(1);
5097}
5098
5099// Is this global address that of a function that can be called by name? (as
5100// opposed to something that must hold a descriptor for an indirect call).
5101static bool isFunctionGlobalAddress(SDValue Callee) {
5102 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5103 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5104 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5105 return false;
5106
5107 return G->getGlobal()->getValueType()->isFunctionTy();
5108 }
5109
5110 return false;
5111}
5112
5113SDValue PPCTargetLowering::LowerCallResult(
5114 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5115 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5116 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5117 SmallVector<CCValAssign, 16> RVLocs;
5118 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5119 *DAG.getContext());
5120
5121 CCRetInfo.AnalyzeCallResult(
5122 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5123 ? RetCC_PPC_Cold
5124 : RetCC_PPC);
5125
5126 // Copy all of the result registers out of their specified physreg.
5127 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5128 CCValAssign &VA = RVLocs[i];
5129 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
;
5130
5131 SDValue Val;
5132
5133 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5134 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5135 InFlag);
5136 Chain = Lo.getValue(1);
5137 InFlag = Lo.getValue(2);
5138 VA = RVLocs[++i]; // skip ahead to next loc
5139 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5140 InFlag);
5141 Chain = Hi.getValue(1);
5142 InFlag = Hi.getValue(2);
5143 if (!Subtarget.isLittleEndian())
5144 std::swap (Lo, Hi);
5145 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5146 } else {
5147 Val = DAG.getCopyFromReg(Chain, dl,
5148 VA.getLocReg(), VA.getLocVT(), InFlag);
5149 Chain = Val.getValue(1);
5150 InFlag = Val.getValue(2);
5151 }
5152
5153 switch (VA.getLocInfo()) {
5154 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5154)
;
5155 case CCValAssign::Full: break;
5156 case CCValAssign::AExt:
5157 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5158 break;
5159 case CCValAssign::ZExt:
5160 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5161 DAG.getValueType(VA.getValVT()));
5162 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5163 break;
5164 case CCValAssign::SExt:
5165 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5166 DAG.getValueType(VA.getValVT()));
5167 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5168 break;
5169 }
5170
5171 InVals.push_back(Val);
5172 }
5173
5174 return Chain;
5175}
5176
5177static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5178 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5179 // PatchPoint calls are not indirect.
5180 if (isPatchPoint)
5181 return false;
5182
5183 if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5184 return false;
5185
5186 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5187 // becuase the immediate function pointer points to a descriptor instead of
5188 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5189 // pointer immediate points to the global entry point, while the BLA would
5190 // need to jump to the local entry point (see rL211174).
5191 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5192 isBLACompatibleAddress(Callee, DAG))
5193 return false;
5194
5195 return true;
5196}
5197
5198// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5199static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5200 return Subtarget.isAIXABI() ||
5201 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5202}
5203
5204static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5205 const Function &Caller,
5206 const SDValue &Callee,
5207 const PPCSubtarget &Subtarget,
5208 const TargetMachine &TM) {
5209 if (CFlags.IsTailCall)
5210 return PPCISD::TC_RETURN;
5211
5212 // This is a call through a function pointer.
5213 if (CFlags.IsIndirect) {
5214 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5215 // indirect calls. The save of the caller's TOC pointer to the stack will be
5216 // inserted into the DAG as part of call lowering. The restore of the TOC
5217 // pointer is modeled by using a pseudo instruction for the call opcode that
5218 // represents the 2 instruction sequence of an indirect branch and link,
5219 // immediately followed by a load of the TOC pointer from the the stack save
5220 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5221 // as it is not saved or used.
5222 return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5223 : PPCISD::BCTRL;
5224 }
5225
5226 if (Subtarget.isUsingPCRelativeCalls()) {
5227 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5227, __PRETTY_FUNCTION__))
;
5228 return PPCISD::CALL_NOTOC;
5229 }
5230
5231 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5232 // immediately following the call instruction if the caller and callee may
5233 // have different TOC bases. At link time if the linker determines the calls
5234 // may not share a TOC base, the call is redirected to a trampoline inserted
5235 // by the linker. The trampoline will (among other things) save the callers
5236 // TOC pointer at an ABI designated offset in the linkage area and the linker
5237 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5238 // into gpr2.
5239 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5240 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5241 : PPCISD::CALL_NOP;
5242
5243 return PPCISD::CALL;
5244}
5245
5246static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5247 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5248 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5249 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5250 return SDValue(Dest, 0);
5251
5252 // Returns true if the callee is local, and false otherwise.
5253 auto isLocalCallee = [&]() {
5254 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5255 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5256 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5257
5258 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5259 !dyn_cast_or_null<GlobalIFunc>(GV);
5260 };
5261
5262 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5263 // a static relocation model causes some versions of GNU LD (2.17.50, at
5264 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5265 // built with secure-PLT.
5266 bool UsePlt =
5267 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5268 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5269
5270 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5271 const TargetMachine &TM = Subtarget.getTargetMachine();
5272 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5273 MCSymbolXCOFF *S =
5274 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5275
5276 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5277 return DAG.getMCSymbol(S, PtrVT);
5278 };
5279
5280 if (isFunctionGlobalAddress(Callee)) {
5281 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5282
5283 if (Subtarget.isAIXABI()) {
5284 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5284, __PRETTY_FUNCTION__))
;
5285 return getAIXFuncEntryPointSymbolSDNode(GV);
5286 }
5287 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5288 UsePlt ? PPCII::MO_PLT : 0);
5289 }
5290
5291 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5292 const char *SymName = S->getSymbol();
5293 if (Subtarget.isAIXABI()) {
5294 // If there exists a user-declared function whose name is the same as the
5295 // ExternalSymbol's, then we pick up the user-declared version.
5296 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5297 if (const Function *F =
5298 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5299 return getAIXFuncEntryPointSymbolSDNode(F);
5300
5301 // On AIX, direct function calls reference the symbol for the function's
5302 // entry point, which is named by prepending a "." before the function's
5303 // C-linkage name. A Qualname is returned here because an external
5304 // function entry point is a csect with XTY_ER property.
5305 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5306 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5307 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5308 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5309 SectionKind::getMetadata());
5310 return Sec->getQualNameSymbol();
5311 };
5312
5313 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5314 }
5315 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5316 UsePlt ? PPCII::MO_PLT : 0);
5317 }
5318
5319 // No transformation needed.
5320 assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5320, __PRETTY_FUNCTION__))
;
5321 return Callee;
5322}
5323
5324static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5325 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))
5326 "Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))
;
5327
5328 // The last operand is the chain, except when the node has glue. If the node
5329 // has glue, then the last operand is the glue, and the chain is the second
5330 // last operand.
5331 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5332 if (LastValue.getValueType() != MVT::Glue)
5333 return LastValue;
5334
5335 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5336}
5337
5338// Creates the node that moves a functions address into the count register
5339// to prepare for an indirect call instruction.
5340static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5341 SDValue &Glue, SDValue &Chain,
5342 const SDLoc &dl) {
5343 SDValue MTCTROps[] = {Chain, Callee, Glue};
5344 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5345 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5346 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5347 // The glue is the second value produced.
5348 Glue = Chain.getValue(1);
5349}
5350
5351static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5352 SDValue &Glue, SDValue &Chain,
5353 SDValue CallSeqStart,
5354 const CallBase *CB, const SDLoc &dl,
5355 bool hasNest,
5356 const PPCSubtarget &Subtarget) {
5357 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5358 // entry point, but to the function descriptor (the function entry point
5359 // address is part of the function descriptor though).
5360 // The function descriptor is a three doubleword structure with the
5361 // following fields: function entry point, TOC base address and
5362 // environment pointer.
5363 // Thus for a call through a function pointer, the following actions need
5364 // to be performed:
5365 // 1. Save the TOC of the caller in the TOC save area of its stack
5366 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5367 // 2. Load the address of the function entry point from the function
5368 // descriptor.
5369 // 3. Load the TOC of the callee from the function descriptor into r2.
5370 // 4. Load the environment pointer from the function descriptor into
5371 // r11.
5372 // 5. Branch to the function entry point address.
5373 // 6. On return of the callee, the TOC of the caller needs to be
5374 // restored (this is done in FinishCall()).
5375 //
5376 // The loads are scheduled at the beginning of the call sequence, and the
5377 // register copies are flagged together to ensure that no other
5378 // operations can be scheduled in between. E.g. without flagging the
5379 // copies together, a TOC access in the caller could be scheduled between
5380 // the assignment of the callee TOC and the branch to the callee, which leads
5381 // to incorrect code.
5382
5383 // Start by loading the function address from the descriptor.
5384 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5385 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5386 ? (MachineMemOperand::MODereferenceable |
5387 MachineMemOperand::MOInvariant)
5388 : MachineMemOperand::MONone;
5389
5390 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5391
5392 // Registers used in building the DAG.
5393 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5394 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5395
5396 // Offsets of descriptor members.
5397 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5398 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5399
5400 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5401 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5402
5403 // One load for the functions entry point address.
5404 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5405 Alignment, MMOFlags);
5406
5407 // One for loading the TOC anchor for the module that contains the called
5408 // function.
5409 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5410 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5411 SDValue TOCPtr =
5412 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5413 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5414
5415 // One for loading the environment pointer.
5416 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5417 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5418 SDValue LoadEnvPtr =
5419 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5420 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5421
5422
5423 // Then copy the newly loaded TOC anchor to the TOC pointer.
5424 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5425 Chain = TOCVal.getValue(0);
5426 Glue = TOCVal.getValue(1);
5427
5428 // If the function call has an explicit 'nest' parameter, it takes the
5429 // place of the environment pointer.
5430 assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5431, __PRETTY_FUNCTION__))
5431 "Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5431, __PRETTY_FUNCTION__))
;
5432 if (!hasNest) {
5433 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5434 Chain = EnvVal.getValue(0);
5435 Glue = EnvVal.getValue(1);
5436 }
5437
5438 // The rest of the indirect call sequence is the same as the non-descriptor
5439 // DAG.
5440 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5441}
5442
5443static void
5444buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5445 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5446 SelectionDAG &DAG,
5447 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5448 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5449 const PPCSubtarget &Subtarget) {
5450 const bool IsPPC64 = Subtarget.isPPC64();
5451 // MVT for a general purpose register.
5452 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5453
5454 // First operand is always the chain.
5455 Ops.push_back(Chain);
5456
5457 // If it's a direct call pass the callee as the second operand.
5458 if (!CFlags.IsIndirect)
5459 Ops.push_back(Callee);
5460 else {
5461 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5461, __PRETTY_FUNCTION__))
;
5462
5463 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5464 // on the stack (this would have been done in `LowerCall_64SVR4` or
5465 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5466 // represents both the indirect branch and a load that restores the TOC
5467 // pointer from the linkage area. The operand for the TOC restore is an add
5468 // of the TOC save offset to the stack pointer. This must be the second
5469 // operand: after the chain input but before any other variadic arguments.
5470 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5471 // saved or used.
5472 if (isTOCSaveRestoreRequired(Subtarget)) {
5473 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5474
5475 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5476 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5477 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5478 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5479 Ops.push_back(AddTOC);
5480 }
5481
5482 // Add the register used for the environment pointer.
5483 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5484 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5485 RegVT));
5486
5487
5488 // Add CTR register as callee so a bctr can be emitted later.
5489 if (CFlags.IsTailCall)
5490 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5491 }
5492
5493 // If this is a tail call add stack pointer delta.
5494 if (CFlags.IsTailCall)
5495 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5496
5497 // Add argument registers to the end of the list so that they are known live
5498 // into the call.
5499 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5500 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5501 RegsToPass[i].second.getValueType()));
5502
5503 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5504 // no way to mark dependencies as implicit here.
5505 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5506 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5507 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5508 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5509
5510 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5511 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5512 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5513
5514 // Add a register mask operand representing the call-preserved registers.
5515 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5516 const uint32_t *Mask =
5517 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5518 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5518, __PRETTY_FUNCTION__))
;
5519 Ops.push_back(DAG.getRegisterMask(Mask));
5520
5521 // If the glue is valid, it is the last operand.
5522 if (Glue.getNode())
5523 Ops.push_back(Glue);
5524}
5525
5526SDValue PPCTargetLowering::FinishCall(
5527 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5528 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5529 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5530 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5531 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5532
5533 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5534 Subtarget.isAIXABI())
5535 setUsesTOCBasePtr(DAG);
5536
5537 unsigned CallOpc =
5538 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5539 Subtarget, DAG.getTarget());
5540
5541 if (!CFlags.IsIndirect)
5542 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5543 else if (Subtarget.usesFunctionDescriptors())
5544 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5545 dl, CFlags.HasNest, Subtarget);
5546 else
5547 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5548
5549 // Build the operand list for the call instruction.
5550 SmallVector<SDValue, 8> Ops;
5551 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5552 SPDiff, Subtarget);
5553
5554 // Emit tail call.
5555 if (CFlags.IsTailCall) {
5556 // Indirect tail call when using PC Relative calls do not have the same
5557 // constraints.
5558 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5559 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5560 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5561 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5562 isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5563 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5564 "Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5565 "register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5566 "used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
;
5567 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5568 assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
5569 "Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
;
5570 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5571 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5572 }
5573
5574 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5575 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5576 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5577 Glue = Chain.getValue(1);
5578
5579 // When performing tail call optimization the callee pops its arguments off
5580 // the stack. Account for this here so these bytes can be pushed back on in
5581 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5582 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5583 getTargetMachine().Options.GuaranteedTailCallOpt)
5584 ? NumBytes
5585 : 0;
5586
5587 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5588 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5589 Glue, dl);
5590 Glue = Chain.getValue(1);
5591
5592 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5593 DAG, InVals);
5594}
5595
5596SDValue
5597PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5598 SmallVectorImpl<SDValue> &InVals) const {
5599 SelectionDAG &DAG = CLI.DAG;
5600 SDLoc &dl = CLI.DL;
5601 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5602 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5603 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5604 SDValue Chain = CLI.Chain;
5605 SDValue Callee = CLI.Callee;
5606 bool &isTailCall = CLI.IsTailCall;
5607 CallingConv::ID CallConv = CLI.CallConv;
5608 bool isVarArg = CLI.IsVarArg;
5609 bool isPatchPoint = CLI.IsPatchPoint;
5610 const CallBase *CB = CLI.CB;
5611
5612 if (isTailCall) {
5613 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5614 isTailCall = false;
5615 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5616 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5617 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5618 else
5619 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5620 Ins, DAG);
5621 if (isTailCall) {
5622 ++NumTailCalls;
5623 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5624 ++NumSiblingCalls;
5625
5626 // PC Relative calls no longer guarantee that the callee is a Global
5627 // Address Node. The callee could be an indirect tail call in which
5628 // case the SDValue for the callee could be a load (to load the address
5629 // of a function pointer) or it may be a register copy (to move the
5630 // address of the callee from a function parameter into a virtual
5631 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5632 assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))
5633 isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))
5634 "Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))
;
5635
5636 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
5637 << "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
;
5638 LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false)
;
5639 }
5640 }
5641
5642 if (!isTailCall && CB && CB->isMustTailCall())
5643 report_fatal_error("failed to perform tail call elimination on a call "
5644 "site marked musttail");
5645
5646 // When long calls (i.e. indirect calls) are always used, calls are always
5647 // made via function pointer. If we have a function name, first translate it
5648 // into a pointer.
5649 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5650 !isTailCall)
5651 Callee = LowerGlobalAddress(Callee, DAG);
5652
5653 CallFlags CFlags(
5654 CallConv, isTailCall, isVarArg, isPatchPoint,
5655 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5656 // hasNest
5657 Subtarget.is64BitELFABI() &&
5658 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5659 CLI.NoMerge);
5660
5661 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5662 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5663 InVals, CB);
5664
5665 if (Subtarget.isSVR4ABI())
5666 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5667 InVals, CB);
5668
5669 if (Subtarget.isAIXABI())
5670 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5671 InVals, CB);
5672
5673 return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5674 InVals, CB);
5675}
5676
5677SDValue PPCTargetLowering::LowerCall_32SVR4(
5678 SDValue Chain, SDValue Callee, CallFlags CFlags,
5679 const SmallVectorImpl<ISD::OutputArg> &Outs,
5680 const SmallVectorImpl<SDValue> &OutVals,
5681 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5682 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5683 const CallBase *CB) const {
5684 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5685 // of the 32-bit SVR4 ABI stack frame layout.
5686
5687 const CallingConv::ID CallConv = CFlags.CallConv;
5688 const bool IsVarArg = CFlags.IsVarArg;
5689 const bool IsTailCall = CFlags.IsTailCall;
5690
5691 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
5692 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
5693 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
;
5694
5695 const Align PtrAlign(4);
5696
5697 MachineFunction &MF = DAG.getMachineFunction();
5698
5699 // Mark this function as potentially containing a function that contains a
5700 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5701 // and restoring the callers stack pointer in this functions epilog. This is
5702 // done because by tail calling the called function might overwrite the value
5703 // in this function's (MF) stack pointer stack slot 0(SP).
5704 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5705 CallConv == CallingConv::Fast)
5706 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5707
5708 // Count how many bytes are to be pushed on the stack, including the linkage
5709 // area, parameter list area and the part of the local variable space which
5710 // contains copies of aggregates which are passed by value.
5711
5712 // Assign locations to all of the outgoing arguments.
5713 SmallVector<CCValAssign, 16> ArgLocs;
5714 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5715
5716 // Reserve space for the linkage area on the stack.
5717 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5718 PtrAlign);
5719 if (useSoftFloat())
5720 CCInfo.PreAnalyzeCallOperands(Outs);
5721
5722 if (IsVarArg) {
5723 // Handle fixed and variable vector arguments differently.
5724 // Fixed vector arguments go into registers as long as registers are
5725 // available. Variable vector arguments always go into memory.
5726 unsigned NumArgs = Outs.size();
5727
5728 for (unsigned i = 0; i != NumArgs; ++i) {
5729 MVT ArgVT = Outs[i].VT;
5730 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5731 bool Result;
5732
5733 if (Outs[i].IsFixed) {
5734 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5735 CCInfo);
5736 } else {
5737 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5738 ArgFlags, CCInfo);
5739 }
5740
5741 if (Result) {
5742#ifndef NDEBUG
5743 errs() << "Call operand #" << i << " has unhandled type "
5744 << EVT(ArgVT).getEVTString() << "\n";
5745#endif
5746 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5746)
;
5747 }
5748 }
5749 } else {
5750 // All arguments are treated the same.
5751 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5752 }
5753 CCInfo.clearWasPPCF128();
5754
5755 // Assign locations to all of the outgoing aggregate by value arguments.
5756 SmallVector<CCValAssign, 16> ByValArgLocs;
5757 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5758
5759 // Reserve stack space for the allocations in CCInfo.
5760 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5761
5762 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5763
5764 // Size of the linkage area, parameter list area and the part of the local
5765 // space variable where copies of aggregates which are passed by value are
5766 // stored.
5767 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5768
5769 // Calculate by how many bytes the stack has to be adjusted in case of tail
5770 // call optimization.
5771 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5772
5773 // Adjust the stack pointer for the new arguments...
5774 // These operations are automatically eliminated by the prolog/epilog pass
5775 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5776 SDValue CallSeqStart = Chain;
5777
5778 // Load the return address and frame pointer so it can be moved somewhere else
5779 // later.
5780 SDValue LROp, FPOp;
5781 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5782
5783 // Set up a copy of the stack pointer for use loading and storing any
5784 // arguments that may not fit in the registers available for argument
5785 // passing.
5786 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5787
5788 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5789 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5790 SmallVector<SDValue, 8> MemOpChains;
5791
5792 bool seenFloatArg = false;
5793 // Walk the register/memloc assignments, inserting copies/loads.
5794 // i - Tracks the index into the list of registers allocated for the call
5795 // RealArgIdx - Tracks the index into the list of actual function arguments
5796 // j - Tracks the index into the list of byval arguments
5797 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5798 i != e;
5799 ++i, ++RealArgIdx) {
5800 CCValAssign &VA = ArgLocs[i];
5801 SDValue Arg = OutVals[RealArgIdx];
5802 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5803
5804 if (Flags.isByVal()) {
5805 // Argument is an aggregate which is passed by value, thus we need to
5806 // create a copy of it in the local variable space of the current stack
5807 // frame (which is the stack frame of the caller) and pass the address of
5808 // this copy to the callee.
5809 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5809, __PRETTY_FUNCTION__))
;
5810 CCValAssign &ByValVA = ByValArgLocs[j++];
5811 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5811, __PRETTY_FUNCTION__))
;
5812
5813 // Memory reserved in the local variable space of the callers stack frame.
5814 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5815
5816 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5817 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5818 StackPtr, PtrOff);
5819
5820 // Create a copy of the argument in the local area of the current
5821 // stack frame.
5822 SDValue MemcpyCall =
5823 CreateCopyOfByValArgument(Arg, PtrOff,
5824 CallSeqStart.getNode()->getOperand(0),
5825 Flags, DAG, dl);
5826
5827 // This must go outside the CALLSEQ_START..END.
5828 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5829 SDLoc(MemcpyCall));
5830 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5831 NewCallSeqStart.getNode());
5832 Chain = CallSeqStart = NewCallSeqStart;
5833
5834 // Pass the address of the aggregate copy on the stack either in a
5835 // physical register or in the parameter list area of the current stack
5836 // frame to the callee.
5837 Arg = PtrOff;
5838 }
5839
5840 // When useCRBits() is true, there can be i1 arguments.
5841 // It is because getRegisterType(MVT::i1) => MVT::i1,
5842 // and for other integer types getRegisterType() => MVT::i32.
5843 // Extend i1 and ensure callee will get i32.
5844 if (Arg.getValueType() == MVT::i1)
5845 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5846 dl, MVT::i32, Arg);
5847
5848 if (VA.isRegLoc()) {
5849 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5850 // Put argument in a physical register.
5851 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5852 bool IsLE = Subtarget.isLittleEndian();
5853 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5854 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5855 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5856 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5857 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5858 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5859 SVal.getValue(0)));
5860 } else
5861 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5862 } else {
5863 // Put argument in the parameter list area of the current stack frame.
5864 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5864, __PRETTY_FUNCTION__))
;
5865 unsigned LocMemOffset = VA.getLocMemOffset();
5866
5867 if (!IsTailCall) {
5868 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5869 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5870 StackPtr, PtrOff);
5871
5872 MemOpChains.push_back(
5873 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5874 } else {
5875 // Calculate and remember argument location.
5876 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5877 TailCallArguments);
5878 }
5879 }
5880 }
5881
5882 if (!MemOpChains.empty())
5883 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5884
5885 // Build a sequence of copy-to-reg nodes chained together with token chain
5886 // and flag operands which copy the outgoing args into the appropriate regs.
5887 SDValue InFlag;
5888 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5889 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5890 RegsToPass[i].second, InFlag);
5891 InFlag = Chain.getValue(1);
5892 }
5893
5894 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5895 // registers.
5896 if (IsVarArg) {
5897 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5898 SDValue Ops[] = { Chain, InFlag };
5899
5900 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5901 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5902
5903 InFlag = Chain.getValue(1);
5904 }
5905
5906 if (IsTailCall)
5907 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5908 TailCallArguments);
5909
5910 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5911 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5912}
5913
5914// Copy an argument into memory, being careful to do this outside the
5915// call sequence for the call to which the argument belongs.
5916SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5917 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5918 SelectionDAG &DAG, const SDLoc &dl) const {
5919 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5920 CallSeqStart.getNode()->getOperand(0),
5921 Flags, DAG, dl);
5922 // The MEMCPY must go outside the CALLSEQ_START..END.
5923 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5924 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,