Bug Summary

File:lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 8832, column 31
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn372087/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-10~svn372087/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn372087/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn372087/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn372087=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-09-17-145504-7198-1 -x c++ /build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp

/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp

1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetRegisterInfo.h"
56#include "llvm/CodeGen/ValueTypes.h"
57#include "llvm/IR/CallSite.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
75#include "llvm/MC/MCRegisterInfo.h"
76#include "llvm/MC/MCSymbolXCOFF.h"
77#include "llvm/Support/AtomicOrdering.h"
78#include "llvm/Support/BranchProbability.h"
79#include "llvm/Support/Casting.h"
80#include "llvm/Support/CodeGen.h"
81#include "llvm/Support/CommandLine.h"
82#include "llvm/Support/Compiler.h"
83#include "llvm/Support/Debug.h"
84#include "llvm/Support/ErrorHandling.h"
85#include "llvm/Support/Format.h"
86#include "llvm/Support/KnownBits.h"
87#include "llvm/Support/MachineValueType.h"
88#include "llvm/Support/MathExtras.h"
89#include "llvm/Support/raw_ostream.h"
90#include "llvm/Target/TargetMachine.h"
91#include "llvm/Target/TargetOptions.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
103
104static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
105cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
106
107static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
108cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
109
110static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableSCO("disable-ppc-sco",
114cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
115
116static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
118
119static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120cl::desc("enable quad precision float support on ppc"), cl::Hidden);
121
122STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
123STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls", {0}, {false}}
;
124
125static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
126
127static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
128
129// FIXME: Remove this once the bug has been fixed!
130extern cl::opt<bool> ANDIGlueBug;
131
132PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
133 const PPCSubtarget &STI)
134 : TargetLowering(TM), Subtarget(STI) {
135 // Use _setjmp/_longjmp instead of setjmp/longjmp.
136 setUseUnderscoreSetJmp(true);
137 setUseUnderscoreLongJmp(true);
138
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64 = Subtarget.isPPC64();
142 setMinStackArgumentAlignment(isPPC64 ? llvm::Align(8) : llvm::Align(4));
143
144 // Set up the register classes.
145 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146 if (!useSoftFloat()) {
147 if (hasSPE()) {
148 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
149 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150 } else {
151 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153 }
154 }
155
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT : MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167 }
168
169 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
170
171 // PowerPC has pre-inc load and store's.
172 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
173 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
174 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
175 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
176 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
177 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
178 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
179 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
180 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
181 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
182 if (!Subtarget.hasSPE()) {
183 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
184 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
185 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
186 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
187 }
188
189 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
191 for (MVT VT : ScalarIntVTs) {
192 setOperationAction(ISD::ADDC, VT, Legal);
193 setOperationAction(ISD::ADDE, VT, Legal);
194 setOperationAction(ISD::SUBC, VT, Legal);
195 setOperationAction(ISD::SUBE, VT, Legal);
196 }
197
198 if (Subtarget.useCRBits()) {
199 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
200
201 if (isPPC64 || Subtarget.hasFPCVT()) {
202 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
203 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
204 isPPC64 ? MVT::i64 : MVT::i32);
205 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
206 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
207 isPPC64 ? MVT::i64 : MVT::i32);
208 } else {
209 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
210 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
211 }
212
213 // PowerPC does not support direct load/store of condition registers.
214 setOperationAction(ISD::LOAD, MVT::i1, Custom);
215 setOperationAction(ISD::STORE, MVT::i1, Custom);
216
217 // FIXME: Remove this once the ANDI glue bug is fixed:
218 if (ANDIGlueBug)
219 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
220
221 for (MVT VT : MVT::integer_valuetypes()) {
222 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
223 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
224 setTruncStoreAction(VT, MVT::i1, Expand);
225 }
226
227 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
228 }
229
230 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231 // PPC (the libcall is not available).
232 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
233 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
234
235 // We do not currently implement these libm ops for PowerPC.
236 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
237 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
238 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
239 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
240 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
241 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
242
243 // PowerPC has no SREM/UREM instructions unless we are on P9
244 // On P9 we may use a hardware instruction to compute the remainder.
245 // The instructions are not legalized directly because in the cases where the
246 // result of both the remainder and the division is required it is more
247 // efficient to compute the remainder from the result of the division rather
248 // than use the remainder instruction.
249 if (Subtarget.isISA3_0()) {
250 setOperationAction(ISD::SREM, MVT::i32, Custom);
251 setOperationAction(ISD::UREM, MVT::i32, Custom);
252 setOperationAction(ISD::SREM, MVT::i64, Custom);
253 setOperationAction(ISD::UREM, MVT::i64, Custom);
254 } else {
255 setOperationAction(ISD::SREM, MVT::i32, Expand);
256 setOperationAction(ISD::UREM, MVT::i32, Expand);
257 setOperationAction(ISD::SREM, MVT::i64, Expand);
258 setOperationAction(ISD::UREM, MVT::i64, Expand);
259 }
260
261 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
263 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
264 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
265 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
266 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
267 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
268 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
269 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
270
271 // We don't support sin/cos/sqrt/fmod/pow
272 setOperationAction(ISD::FSIN , MVT::f64, Expand);
273 setOperationAction(ISD::FCOS , MVT::f64, Expand);
274 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
275 setOperationAction(ISD::FREM , MVT::f64, Expand);
276 setOperationAction(ISD::FPOW , MVT::f64, Expand);
277 setOperationAction(ISD::FSIN , MVT::f32, Expand);
278 setOperationAction(ISD::FCOS , MVT::f32, Expand);
279 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
280 setOperationAction(ISD::FREM , MVT::f32, Expand);
281 setOperationAction(ISD::FPOW , MVT::f32, Expand);
282 if (Subtarget.hasSPE()) {
283 setOperationAction(ISD::FMA , MVT::f64, Expand);
284 setOperationAction(ISD::FMA , MVT::f32, Expand);
285 } else {
286 setOperationAction(ISD::FMA , MVT::f64, Legal);
287 setOperationAction(ISD::FMA , MVT::f32, Legal);
288 }
289
290 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
291
292 // If we're enabling GP optimizations, use hardware square root
293 if (!Subtarget.hasFSQRT() &&
294 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
295 Subtarget.hasFRE()))
296 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
297
298 if (!Subtarget.hasFSQRT() &&
299 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
300 Subtarget.hasFRES()))
301 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
302
303 if (Subtarget.hasFCPSGN()) {
304 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
305 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
306 } else {
307 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
308 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
309 }
310
311 if (Subtarget.hasFPRND()) {
312 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
313 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
314 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
315 setOperationAction(ISD::FROUND, MVT::f64, Legal);
316
317 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
318 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
319 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
320 setOperationAction(ISD::FROUND, MVT::f32, Legal);
321 }
322
323 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324 // to speed up scalar BSWAP64.
325 // CTPOP or CTTZ were introduced in P8/P9 respectively
326 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
327 if (Subtarget.hasP9Vector())
328 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
329 else
330 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
331 if (Subtarget.isISA3_0()) {
332 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
333 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
334 } else {
335 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
336 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
337 }
338
339 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
340 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
341 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
342 } else {
343 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
344 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
345 }
346
347 // PowerPC does not have ROTR
348 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
349 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
350
351 if (!Subtarget.useCRBits()) {
352 // PowerPC does not have Select
353 setOperationAction(ISD::SELECT, MVT::i32, Expand);
354 setOperationAction(ISD::SELECT, MVT::i64, Expand);
355 setOperationAction(ISD::SELECT, MVT::f32, Expand);
356 setOperationAction(ISD::SELECT, MVT::f64, Expand);
357 }
358
359 // PowerPC wants to turn select_cc of FP into fsel when possible.
360 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
361 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
362
363 // PowerPC wants to optimize integer setcc a bit
364 if (!Subtarget.useCRBits())
365 setOperationAction(ISD::SETCC, MVT::i32, Custom);
366
367 // PowerPC does not have BRCOND which requires SetCC
368 if (!Subtarget.useCRBits())
369 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
370
371 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
372
373 if (Subtarget.hasSPE()) {
374 // SPE has built-in conversions
375 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
376 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
378 } else {
379 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
381
382 // PowerPC does not have [U|S]INT_TO_FP
383 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
384 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
385 }
386
387 if (Subtarget.hasDirectMove() && isPPC64) {
388 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
389 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
390 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
391 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
392 } else {
393 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
394 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
395 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
396 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
397 }
398
399 // We cannot sextinreg(i1). Expand to shifts.
400 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
401
402 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404 // support continuation, user-level threading, and etc.. As a result, no
405 // other SjLj exception interfaces are implemented and please don't build
406 // your own exception handling based on them.
407 // LLVM/Clang supports zero-cost DWARF exception handling.
408 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
409 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
410
411 // We want to legalize GlobalAddress and ConstantPool nodes into the
412 // appropriate instructions to materialize the address.
413 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
414 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
415 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
416 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
417 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
418 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
419 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
420 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
421 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
422 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
423
424 // TRAP is legal.
425 setOperationAction(ISD::TRAP, MVT::Other, Legal);
426
427 // TRAMPOLINE is custom lowered.
428 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
429 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
430
431 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432 setOperationAction(ISD::VASTART , MVT::Other, Custom);
433
434 if (Subtarget.is64BitELFABI()) {
435 // VAARG always uses double-word chunks, so promote anything smaller.
436 setOperationAction(ISD::VAARG, MVT::i1, Promote);
437 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
438 setOperationAction(ISD::VAARG, MVT::i8, Promote);
439 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
440 setOperationAction(ISD::VAARG, MVT::i16, Promote);
441 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
442 setOperationAction(ISD::VAARG, MVT::i32, Promote);
443 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
444 setOperationAction(ISD::VAARG, MVT::Other, Expand);
445 } else if (Subtarget.is32BitELFABI()) {
446 // VAARG is custom lowered with the 32-bit SVR4 ABI.
447 setOperationAction(ISD::VAARG, MVT::Other, Custom);
448 setOperationAction(ISD::VAARG, MVT::i64, Custom);
449 } else
450 setOperationAction(ISD::VAARG, MVT::Other, Expand);
451
452 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
453 if (Subtarget.is32BitELFABI())
454 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
455 else
456 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
457
458 // Use the default implementation.
459 setOperationAction(ISD::VAEND , MVT::Other, Expand);
460 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
461 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
462 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
463 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
465 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
466 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
467 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
468
469 // We want to custom lower some of our intrinsics.
470 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
471
472 // To handle counter-based loop conditions.
473 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
474
475 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
476 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
477 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
478 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
479
480 // Comparisons that require checking two conditions.
481 if (Subtarget.hasSPE()) {
482 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
483 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
484 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
485 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
486 }
487 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
488 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
489 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
490 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
491 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
492 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
493 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
494 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
495 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
496 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
497 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
498 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
499
500 if (Subtarget.has64BitSupport()) {
501 // They also have instructions for converting between i64 and fp.
502 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
503 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
504 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
505 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
506 // This is just the low 32 bits of a (signed) fp->i64 conversion.
507 // We cannot do this with Promote because i64 is not a legal type.
508 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
509
510 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
511 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
512 } else {
513 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
514 if (Subtarget.hasSPE())
515 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
516 else
517 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
518 }
519
520 // With the instructions enabled under FPCVT, we can do everything.
521 if (Subtarget.hasFPCVT()) {
522 if (Subtarget.has64BitSupport()) {
523 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
524 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
525 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
526 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
527 }
528
529 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
530 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
531 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
532 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
533 }
534
535 if (Subtarget.use64BitRegs()) {
536 // 64-bit PowerPC implementations can support i64 types directly
537 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
538 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
539 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
540 // 64-bit PowerPC wants to expand i128 shifts itself.
541 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
542 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
543 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
544 } else {
545 // 32-bit PowerPC wants to expand i64 shifts itself.
546 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
547 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
548 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
549 }
550
551 if (Subtarget.hasAltivec()) {
552 // First set operation action for all vector types to expand. Then we
553 // will selectively turn on ones that can be effectively codegen'd.
554 for (MVT VT : MVT::vector_valuetypes()) {
555 // add/sub are legal for all supported vector VT's.
556 setOperationAction(ISD::ADD, VT, Legal);
557 setOperationAction(ISD::SUB, VT, Legal);
558
559 // For v2i64, these are only valid with P8Vector. This is corrected after
560 // the loop.
561 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
562 setOperationAction(ISD::SMAX, VT, Legal);
563 setOperationAction(ISD::SMIN, VT, Legal);
564 setOperationAction(ISD::UMAX, VT, Legal);
565 setOperationAction(ISD::UMIN, VT, Legal);
566 }
567 else {
568 setOperationAction(ISD::SMAX, VT, Expand);
569 setOperationAction(ISD::SMIN, VT, Expand);
570 setOperationAction(ISD::UMAX, VT, Expand);
571 setOperationAction(ISD::UMIN, VT, Expand);
572 }
573
574 if (Subtarget.hasVSX()) {
575 setOperationAction(ISD::FMAXNUM, VT, Legal);
576 setOperationAction(ISD::FMINNUM, VT, Legal);
577 }
578
579 // Vector instructions introduced in P8
580 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
581 setOperationAction(ISD::CTPOP, VT, Legal);
582 setOperationAction(ISD::CTLZ, VT, Legal);
583 }
584 else {
585 setOperationAction(ISD::CTPOP, VT, Expand);
586 setOperationAction(ISD::CTLZ, VT, Expand);
587 }
588
589 // Vector instructions introduced in P9
590 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
591 setOperationAction(ISD::CTTZ, VT, Legal);
592 else
593 setOperationAction(ISD::CTTZ, VT, Expand);
594
595 // We promote all shuffles to v16i8.
596 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
597 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
598
599 // We promote all non-typed operations to v4i32.
600 setOperationAction(ISD::AND , VT, Promote);
601 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
602 setOperationAction(ISD::OR , VT, Promote);
603 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
604 setOperationAction(ISD::XOR , VT, Promote);
605 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
606 setOperationAction(ISD::LOAD , VT, Promote);
607 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
608 setOperationAction(ISD::SELECT, VT, Promote);
609 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
610 setOperationAction(ISD::VSELECT, VT, Legal);
611 setOperationAction(ISD::SELECT_CC, VT, Promote);
612 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
613 setOperationAction(ISD::STORE, VT, Promote);
614 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
615
616 // No other operations are legal.
617 setOperationAction(ISD::MUL , VT, Expand);
618 setOperationAction(ISD::SDIV, VT, Expand);
619 setOperationAction(ISD::SREM, VT, Expand);
620 setOperationAction(ISD::UDIV, VT, Expand);
621 setOperationAction(ISD::UREM, VT, Expand);
622 setOperationAction(ISD::FDIV, VT, Expand);
623 setOperationAction(ISD::FREM, VT, Expand);
624 setOperationAction(ISD::FNEG, VT, Expand);
625 setOperationAction(ISD::FSQRT, VT, Expand);
626 setOperationAction(ISD::FLOG, VT, Expand);
627 setOperationAction(ISD::FLOG10, VT, Expand);
628 setOperationAction(ISD::FLOG2, VT, Expand);
629 setOperationAction(ISD::FEXP, VT, Expand);
630 setOperationAction(ISD::FEXP2, VT, Expand);
631 setOperationAction(ISD::FSIN, VT, Expand);
632 setOperationAction(ISD::FCOS, VT, Expand);
633 setOperationAction(ISD::FABS, VT, Expand);
634 setOperationAction(ISD::FFLOOR, VT, Expand);
635 setOperationAction(ISD::FCEIL, VT, Expand);
636 setOperationAction(ISD::FTRUNC, VT, Expand);
637 setOperationAction(ISD::FRINT, VT, Expand);
638 setOperationAction(ISD::FNEARBYINT, VT, Expand);
639 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
640 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
641 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
642 setOperationAction(ISD::MULHU, VT, Expand);
643 setOperationAction(ISD::MULHS, VT, Expand);
644 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
645 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
646 setOperationAction(ISD::UDIVREM, VT, Expand);
647 setOperationAction(ISD::SDIVREM, VT, Expand);
648 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
649 setOperationAction(ISD::FPOW, VT, Expand);
650 setOperationAction(ISD::BSWAP, VT, Expand);
651 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
652 setOperationAction(ISD::ROTL, VT, Expand);
653 setOperationAction(ISD::ROTR, VT, Expand);
654
655 for (MVT InnerVT : MVT::vector_valuetypes()) {
656 setTruncStoreAction(VT, InnerVT, Expand);
657 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
658 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
659 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
660 }
661 }
662 if (!Subtarget.hasP8Vector()) {
663 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
664 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
665 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
666 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
667 }
668
669 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
670 setOperationAction(ISD::ABS, VT, Custom);
671
672 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
673 // with merges, splats, etc.
674 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
675
676 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
677 // are cheap, so handle them before they get expanded to scalar.
678 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
679 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
680 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
681 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
682 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
683
684 setOperationAction(ISD::AND , MVT::v4i32, Legal);
685 setOperationAction(ISD::OR , MVT::v4i32, Legal);
686 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
687 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
688 setOperationAction(ISD::SELECT, MVT::v4i32,
689 Subtarget.useCRBits() ? Legal : Expand);
690 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
691 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
692 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
693 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
694 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
695 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
696 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
697 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
698 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
699
700 // Without hasP8Altivec set, v2i64 SMAX isn't available.
701 // But ABS custom lowering requires SMAX support.
702 if (!Subtarget.hasP8Altivec())
703 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
704
705 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
706 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
707 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
708 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
709
710 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
711 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
712
713 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
714 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
715 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
716 }
717
718 if (Subtarget.hasP8Altivec())
719 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
720 else
721 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
722
723 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
724 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
725
726 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
727 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
728
729 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
730 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
731 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
732 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
733
734 // Altivec does not contain unordered floating-point compare instructions
735 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
736 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
737 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
738 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
739
740 if (Subtarget.hasVSX()) {
741 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
742 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
743 if (Subtarget.hasP8Vector()) {
744 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
745 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
746 }
747 if (Subtarget.hasDirectMove() && isPPC64) {
748 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
749 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
750 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
751 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
752 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
753 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
756 }
757 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
758
759 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
760 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
761 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
762 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
763 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
764
765 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
766
767 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
768 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
769
770 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
771 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
772
773 // Share the Altivec comparison restrictions.
774 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
775 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
776 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
777 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
778
779 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
780 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
781
782 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
783
784 if (Subtarget.hasP8Vector())
785 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
786
787 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
788
789 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
790 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
791 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
792
793 if (Subtarget.hasP8Altivec()) {
794 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
795 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
796 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
797
798 // 128 bit shifts can be accomplished via 3 instructions for SHL and
799 // SRL, but not for SRA because of the instructions available:
800 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
801 // doing
802 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
803 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
804 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
805
806 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
807 }
808 else {
809 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
810 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
811 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
812
813 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
814
815 // VSX v2i64 only supports non-arithmetic operations.
816 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
817 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
818 }
819
820 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
821 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
822 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
823 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
824
825 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
826
827 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
828 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
829 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
830 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
831
832 // Custom handling for partial vectors of integers converted to
833 // floating point. We already have optimal handling for v2i32 through
834 // the DAG combine, so those aren't necessary.
835 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
836 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
837 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
838 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
839 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
840 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
841 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
842 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
843
844 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
845 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
846 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
847 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
848 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
849 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
850
851 if (Subtarget.hasDirectMove())
852 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
853 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
854
855 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
856 }
857
858 if (Subtarget.hasP8Altivec()) {
859 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
860 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
861 }
862
863 if (Subtarget.hasP9Vector()) {
864 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
865 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
866
867 // 128 bit shifts can be accomplished via 3 instructions for SHL and
868 // SRL, but not for SRA because of the instructions available:
869 // VS{RL} and VS{RL}O.
870 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
871 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
872 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
873
874 if (EnableQuadPrecision) {
875 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
876 setOperationAction(ISD::FADD, MVT::f128, Legal);
877 setOperationAction(ISD::FSUB, MVT::f128, Legal);
878 setOperationAction(ISD::FDIV, MVT::f128, Legal);
879 setOperationAction(ISD::FMUL, MVT::f128, Legal);
880 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
881 // No extending loads to f128 on PPC.
882 for (MVT FPT : MVT::fp_valuetypes())
883 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
884 setOperationAction(ISD::FMA, MVT::f128, Legal);
885 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
886 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
887 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
888 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
889 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
890 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
891
892 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
893 setOperationAction(ISD::FRINT, MVT::f128, Legal);
894 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
895 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
896 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
897 setOperationAction(ISD::FROUND, MVT::f128, Legal);
898
899 setOperationAction(ISD::SELECT, MVT::f128, Expand);
900 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
901 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
902 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
903 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
904 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
905 // No implementation for these ops for PowerPC.
906 setOperationAction(ISD::FSIN , MVT::f128, Expand);
907 setOperationAction(ISD::FCOS , MVT::f128, Expand);
908 setOperationAction(ISD::FPOW, MVT::f128, Expand);
909 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
910 setOperationAction(ISD::FREM, MVT::f128, Expand);
911 }
912 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
913
914 }
915
916 if (Subtarget.hasP9Altivec()) {
917 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
918 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
919 }
920 }
921
922 if (Subtarget.hasQPX()) {
923 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
924 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
925 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
926 setOperationAction(ISD::FREM, MVT::v4f64, Expand);
927
928 setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
929 setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
930
931 setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
932 setOperationAction(ISD::STORE , MVT::v4f64, Custom);
933
934 setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
935 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
936
937 if (!Subtarget.useCRBits())
938 setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
939 setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
940
941 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
942 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
943 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
944 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
945 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
946 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
947 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
948
949 setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
950 setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
951
952 setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
953 setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
954
955 setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
956 setOperationAction(ISD::FABS , MVT::v4f64, Legal);
957 setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
958 setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
959 setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
960 setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
961 setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
962 setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
963 setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
964 setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
965
966 setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
967 setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
968
969 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
970 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
971
972 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
973
974 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
975 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
976 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
977 setOperationAction(ISD::FREM, MVT::v4f32, Expand);
978
979 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
980 setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
981
982 setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
983 setOperationAction(ISD::STORE , MVT::v4f32, Custom);
984
985 if (!Subtarget.useCRBits())
986 setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
987 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
988
989 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
990 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
991 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
992 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
993 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
994 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
995 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
996
997 setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
998 setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
999
1000 setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
1001 setOperationAction(ISD::FABS , MVT::v4f32, Legal);
1002 setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
1003 setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
1004 setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
1005 setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
1006 setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
1007 setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
1008 setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
1009 setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
1010
1011 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1012 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1013
1014 setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
1015 setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
1016
1017 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1018
1019 setOperationAction(ISD::AND , MVT::v4i1, Legal);
1020 setOperationAction(ISD::OR , MVT::v4i1, Legal);
1021 setOperationAction(ISD::XOR , MVT::v4i1, Legal);
1022
1023 if (!Subtarget.useCRBits())
1024 setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
1025 setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
1026
1027 setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
1028 setOperationAction(ISD::STORE , MVT::v4i1, Custom);
1029
1030 setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
1031 setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
1032 setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
1033 setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
1034 setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
1035 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
1036 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1037
1038 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1039 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1040
1041 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1042
1043 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1044 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1045 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1046 setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
1047
1048 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1051 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1052
1053 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
1054 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
1055
1056 // These need to set FE_INEXACT, and so cannot be vectorized here.
1057 setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
1058 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
1059
1060 if (TM.Options.UnsafeFPMath) {
1061 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1062 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1063
1064 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
1065 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1066 } else {
1067 setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
1068 setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
1069
1070 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
1071 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
1072 }
1073 }
1074
1075 if (Subtarget.has64BitSupport())
1076 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1077
1078 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1079
1080 if (!isPPC64) {
1081 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1082 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1083 }
1084
1085 setBooleanContents(ZeroOrOneBooleanContent);
1086
1087 if (Subtarget.hasAltivec()) {
1088 // Altivec instructions set fields to all zeros or all ones.
1089 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1090 }
1091
1092 if (!isPPC64) {
1093 // These libcalls are not available in 32-bit.
1094 setLibcallName(RTLIB::SHL_I128, nullptr);
1095 setLibcallName(RTLIB::SRL_I128, nullptr);
1096 setLibcallName(RTLIB::SRA_I128, nullptr);
1097 }
1098
1099 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1100
1101 // We have target-specific dag combine patterns for the following nodes:
1102 setTargetDAGCombine(ISD::ADD);
1103 setTargetDAGCombine(ISD::SHL);
1104 setTargetDAGCombine(ISD::SRA);
1105 setTargetDAGCombine(ISD::SRL);
1106 setTargetDAGCombine(ISD::MUL);
1107 setTargetDAGCombine(ISD::SINT_TO_FP);
1108 setTargetDAGCombine(ISD::BUILD_VECTOR);
1109 if (Subtarget.hasFPCVT())
1110 setTargetDAGCombine(ISD::UINT_TO_FP);
1111 setTargetDAGCombine(ISD::LOAD);
1112 setTargetDAGCombine(ISD::STORE);
1113 setTargetDAGCombine(ISD::BR_CC);
1114 if (Subtarget.useCRBits())
1115 setTargetDAGCombine(ISD::BRCOND);
1116 setTargetDAGCombine(ISD::BSWAP);
1117 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1118 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1119 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1120
1121 setTargetDAGCombine(ISD::SIGN_EXTEND);
1122 setTargetDAGCombine(ISD::ZERO_EXTEND);
1123 setTargetDAGCombine(ISD::ANY_EXTEND);
1124
1125 setTargetDAGCombine(ISD::TRUNCATE);
1126 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1127
1128
1129 if (Subtarget.useCRBits()) {
1130 setTargetDAGCombine(ISD::TRUNCATE);
1131 setTargetDAGCombine(ISD::SETCC);
1132 setTargetDAGCombine(ISD::SELECT_CC);
1133 }
1134
1135 // Use reciprocal estimates.
1136 if (TM.Options.UnsafeFPMath) {
1137 setTargetDAGCombine(ISD::FDIV);
1138 setTargetDAGCombine(ISD::FSQRT);
1139 }
1140
1141 if (Subtarget.hasP9Altivec()) {
1142 setTargetDAGCombine(ISD::ABS);
1143 setTargetDAGCombine(ISD::VSELECT);
1144 }
1145
1146 // Darwin long double math library functions have $LDBL128 appended.
1147 if (Subtarget.isDarwin()) {
1148 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1149 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1150 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1151 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1152 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1153 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1154 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1155 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1156 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1157 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1158 }
1159
1160 if (EnableQuadPrecision) {
1161 setLibcallName(RTLIB::LOG_F128, "logf128");
1162 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1163 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1164 setLibcallName(RTLIB::EXP_F128, "expf128");
1165 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1166 setLibcallName(RTLIB::SIN_F128, "sinf128");
1167 setLibcallName(RTLIB::COS_F128, "cosf128");
1168 setLibcallName(RTLIB::POW_F128, "powf128");
1169 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1170 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1171 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1172 setLibcallName(RTLIB::REM_F128, "fmodf128");
1173 }
1174
1175 // With 32 condition bits, we don't need to sink (and duplicate) compares
1176 // aggressively in CodeGenPrep.
1177 if (Subtarget.useCRBits()) {
1178 setHasMultipleConditionRegisters();
1179 setJumpIsExpensive();
1180 }
1181
1182 setMinFunctionAlignment(llvm::Align(4));
1183 if (Subtarget.isDarwin())
1184 setPrefFunctionAlignment(llvm::Align(16));
1185
1186 switch (Subtarget.getDarwinDirective()) {
1187 default: break;
1188 case PPC::DIR_970:
1189 case PPC::DIR_A2:
1190 case PPC::DIR_E500:
1191 case PPC::DIR_E500mc:
1192 case PPC::DIR_E5500:
1193 case PPC::DIR_PWR4:
1194 case PPC::DIR_PWR5:
1195 case PPC::DIR_PWR5X:
1196 case PPC::DIR_PWR6:
1197 case PPC::DIR_PWR6X:
1198 case PPC::DIR_PWR7:
1199 case PPC::DIR_PWR8:
1200 case PPC::DIR_PWR9:
1201 setPrefLoopAlignment(llvm::Align(16));
1202 setPrefFunctionAlignment(llvm::Align(16));
1203 break;
1204 }
1205
1206 if (Subtarget.enableMachineScheduler())
1207 setSchedulingPreference(Sched::Source);
1208 else
1209 setSchedulingPreference(Sched::Hybrid);
1210
1211 computeRegisterProperties(STI.getRegisterInfo());
1212
1213 // The Freescale cores do better with aggressive inlining of memcpy and
1214 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1215 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1216 Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1217 MaxStoresPerMemset = 32;
1218 MaxStoresPerMemsetOptSize = 16;
1219 MaxStoresPerMemcpy = 32;
1220 MaxStoresPerMemcpyOptSize = 8;
1221 MaxStoresPerMemmove = 32;
1222 MaxStoresPerMemmoveOptSize = 8;
1223 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1224 // The A2 also benefits from (very) aggressive inlining of memcpy and
1225 // friends. The overhead of a the function call, even when warm, can be
1226 // over one hundred cycles.
1227 MaxStoresPerMemset = 128;
1228 MaxStoresPerMemcpy = 128;
1229 MaxStoresPerMemmove = 128;
1230 MaxLoadsPerMemcmp = 128;
1231 } else {
1232 MaxLoadsPerMemcmp = 8;
1233 MaxLoadsPerMemcmpOptSize = 4;
1234 }
1235}
1236
1237/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1238/// the desired ByVal argument alignment.
1239static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1240 unsigned MaxMaxAlign) {
1241 if (MaxAlign == MaxMaxAlign)
1242 return;
1243 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1244 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1245 MaxAlign = 32;
1246 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1247 MaxAlign = 16;
1248 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1249 unsigned EltAlign = 0;
1250 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1251 if (EltAlign > MaxAlign)
1252 MaxAlign = EltAlign;
1253 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1254 for (auto *EltTy : STy->elements()) {
1255 unsigned EltAlign = 0;
1256 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1257 if (EltAlign > MaxAlign)
1258 MaxAlign = EltAlign;
1259 if (MaxAlign == MaxMaxAlign)
1260 break;
1261 }
1262 }
1263}
1264
1265/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1266/// function arguments in the caller parameter area.
1267unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1268 const DataLayout &DL) const {
1269 // Darwin passes everything on 4 byte boundary.
1270 if (Subtarget.isDarwin())
1271 return 4;
1272
1273 // 16byte and wider vectors are passed on 16byte boundary.
1274 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1275 unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1276 if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1277 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1278 return Align;
1279}
1280
1281bool PPCTargetLowering::useSoftFloat() const {
1282 return Subtarget.useSoftFloat();
1283}
1284
1285bool PPCTargetLowering::hasSPE() const {
1286 return Subtarget.hasSPE();
1287}
1288
1289bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1290 return VT.isScalarInteger();
1291}
1292
1293const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1294 switch ((PPCISD::NodeType)Opcode) {
1295 case PPCISD::FIRST_NUMBER: break;
1296 case PPCISD::FSEL: return "PPCISD::FSEL";
1297 case PPCISD::FCFID: return "PPCISD::FCFID";
1298 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1299 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1300 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1301 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1302 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1303 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1304 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1305 case PPCISD::FP_TO_UINT_IN_VSR:
1306 return "PPCISD::FP_TO_UINT_IN_VSR,";
1307 case PPCISD::FP_TO_SINT_IN_VSR:
1308 return "PPCISD::FP_TO_SINT_IN_VSR";
1309 case PPCISD::FRE: return "PPCISD::FRE";
1310 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1311 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1312 case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
1313 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
1314 case PPCISD::VPERM: return "PPCISD::VPERM";
1315 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1316 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1317 case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
1318 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1319 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1320 case PPCISD::CMPB: return "PPCISD::CMPB";
1321 case PPCISD::Hi: return "PPCISD::Hi";
1322 case PPCISD::Lo: return "PPCISD::Lo";
1323 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1324 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1325 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1326 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1327 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1328 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1329 case PPCISD::SRL: return "PPCISD::SRL";
1330 case PPCISD::SRA: return "PPCISD::SRA";
1331 case PPCISD::SHL: return "PPCISD::SHL";
1332 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1333 case PPCISD::CALL: return "PPCISD::CALL";
1334 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1335 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1336 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1337 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1338 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1339 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1340 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1341 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1342 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1343 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1344 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1345 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1346 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1347 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1348 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1349 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1350 case PPCISD::VCMP: return "PPCISD::VCMP";
1351 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1352 case PPCISD::LBRX: return "PPCISD::LBRX";
1353 case PPCISD::STBRX: return "PPCISD::STBRX";
1354 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1355 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1356 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1357 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1358 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1359 case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
1360 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1361 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1362 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1363 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1364 case PPCISD::ST_VSR_SCAL_INT:
1365 return "PPCISD::ST_VSR_SCAL_INT";
1366 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1367 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1368 case PPCISD::BDZ: return "PPCISD::BDZ";
1369 case PPCISD::MFFS: return "PPCISD::MFFS";
1370 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1371 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1372 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1373 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1374 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1375 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1376 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1377 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1378 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1379 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1380 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1381 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1382 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1383 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1384 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1385 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1386 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1387 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1388 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1389 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1390 case PPCISD::SC: return "PPCISD::SC";
1391 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1392 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1393 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1394 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1395 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1396 case PPCISD::VABSD: return "PPCISD::VABSD";
1397 case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1398 case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1399 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1400 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1401 case PPCISD::QBFLT: return "PPCISD::QBFLT";
1402 case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1403 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1404 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1405 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1406 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1407 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1408 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1409 }
1410 return nullptr;
1411}
1412
1413EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1414 EVT VT) const {
1415 if (!VT.isVector())
1416 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1417
1418 if (Subtarget.hasQPX())
1419 return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1420
1421 return VT.changeVectorElementTypeToInteger();
1422}
1423
1424bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1425 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1425, __PRETTY_FUNCTION__))
;
1426 return true;
1427}
1428
1429//===----------------------------------------------------------------------===//
1430// Node matching predicates, for use by the tblgen matching code.
1431//===----------------------------------------------------------------------===//
1432
1433/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1434static bool isFloatingPointZero(SDValue Op) {
1435 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1436 return CFP->getValueAPF().isZero();
1437 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1438 // Maybe this has already been legalized into the constant pool?
1439 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1440 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1441 return CFP->getValueAPF().isZero();
1442 }
1443 return false;
1444}
1445
1446/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1447/// true if Op is undef or if it matches the specified value.
1448static bool isConstantOrUndef(int Op, int Val) {
1449 return Op < 0 || Op == Val;
1450}
1451
1452/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1453/// VPKUHUM instruction.
1454/// The ShuffleKind distinguishes between big-endian operations with
1455/// two different inputs (0), either-endian operations with two identical
1456/// inputs (1), and little-endian operations with two different inputs (2).
1457/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1458bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1459 SelectionDAG &DAG) {
1460 bool IsLE = DAG.getDataLayout().isLittleEndian();
1461 if (ShuffleKind == 0) {
1462 if (IsLE)
1463 return false;
1464 for (unsigned i = 0; i != 16; ++i)
1465 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1466 return false;
1467 } else if (ShuffleKind == 2) {
1468 if (!IsLE)
1469 return false;
1470 for (unsigned i = 0; i != 16; ++i)
1471 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1472 return false;
1473 } else if (ShuffleKind == 1) {
1474 unsigned j = IsLE ? 0 : 1;
1475 for (unsigned i = 0; i != 8; ++i)
1476 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1477 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1478 return false;
1479 }
1480 return true;
1481}
1482
1483/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1484/// VPKUWUM instruction.
1485/// The ShuffleKind distinguishes between big-endian operations with
1486/// two different inputs (0), either-endian operations with two identical
1487/// inputs (1), and little-endian operations with two different inputs (2).
1488/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1489bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1490 SelectionDAG &DAG) {
1491 bool IsLE = DAG.getDataLayout().isLittleEndian();
1492 if (ShuffleKind == 0) {
1493 if (IsLE)
1494 return false;
1495 for (unsigned i = 0; i != 16; i += 2)
1496 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1497 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1498 return false;
1499 } else if (ShuffleKind == 2) {
1500 if (!IsLE)
1501 return false;
1502 for (unsigned i = 0; i != 16; i += 2)
1503 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1504 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1505 return false;
1506 } else if (ShuffleKind == 1) {
1507 unsigned j = IsLE ? 0 : 2;
1508 for (unsigned i = 0; i != 8; i += 2)
1509 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1510 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1511 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1512 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1513 return false;
1514 }
1515 return true;
1516}
1517
1518/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1519/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1520/// current subtarget.
1521///
1522/// The ShuffleKind distinguishes between big-endian operations with
1523/// two different inputs (0), either-endian operations with two identical
1524/// inputs (1), and little-endian operations with two different inputs (2).
1525/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1526bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1527 SelectionDAG &DAG) {
1528 const PPCSubtarget& Subtarget =
1529 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1530 if (!Subtarget.hasP8Vector())
1531 return false;
1532
1533 bool IsLE = DAG.getDataLayout().isLittleEndian();
1534 if (ShuffleKind == 0) {
1535 if (IsLE)
1536 return false;
1537 for (unsigned i = 0; i != 16; i += 4)
1538 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1539 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1540 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1541 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1542 return false;
1543 } else if (ShuffleKind == 2) {
1544 if (!IsLE)
1545 return false;
1546 for (unsigned i = 0; i != 16; i += 4)
1547 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1548 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1549 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1550 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1551 return false;
1552 } else if (ShuffleKind == 1) {
1553 unsigned j = IsLE ? 0 : 4;
1554 for (unsigned i = 0; i != 8; i += 4)
1555 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1556 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1557 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1558 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1559 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1560 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1561 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1562 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1563 return false;
1564 }
1565 return true;
1566}
1567
1568/// isVMerge - Common function, used to match vmrg* shuffles.
1569///
1570static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1571 unsigned LHSStart, unsigned RHSStart) {
1572 if (N->getValueType(0) != MVT::v16i8)
1573 return false;
1574 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1575, __PRETTY_FUNCTION__))
1575 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1575, __PRETTY_FUNCTION__))
;
1576
1577 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1578 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1579 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1580 LHSStart+j+i*UnitSize) ||
1581 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1582 RHSStart+j+i*UnitSize))
1583 return false;
1584 }
1585 return true;
1586}
1587
1588/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1589/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1590/// The ShuffleKind distinguishes between big-endian merges with two
1591/// different inputs (0), either-endian merges with two identical inputs (1),
1592/// and little-endian merges with two different inputs (2). For the latter,
1593/// the input operands are swapped (see PPCInstrAltivec.td).
1594bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1595 unsigned ShuffleKind, SelectionDAG &DAG) {
1596 if (DAG.getDataLayout().isLittleEndian()) {
1597 if (ShuffleKind == 1) // unary
1598 return isVMerge(N, UnitSize, 0, 0);
1599 else if (ShuffleKind == 2) // swapped
1600 return isVMerge(N, UnitSize, 0, 16);
1601 else
1602 return false;
1603 } else {
1604 if (ShuffleKind == 1) // unary
1605 return isVMerge(N, UnitSize, 8, 8);
1606 else if (ShuffleKind == 0) // normal
1607 return isVMerge(N, UnitSize, 8, 24);
1608 else
1609 return false;
1610 }
1611}
1612
1613/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1614/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1615/// The ShuffleKind distinguishes between big-endian merges with two
1616/// different inputs (0), either-endian merges with two identical inputs (1),
1617/// and little-endian merges with two different inputs (2). For the latter,
1618/// the input operands are swapped (see PPCInstrAltivec.td).
1619bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1620 unsigned ShuffleKind, SelectionDAG &DAG) {
1621 if (DAG.getDataLayout().isLittleEndian()) {
1622 if (ShuffleKind == 1) // unary
1623 return isVMerge(N, UnitSize, 8, 8);
1624 else if (ShuffleKind == 2) // swapped
1625 return isVMerge(N, UnitSize, 8, 24);
1626 else
1627 return false;
1628 } else {
1629 if (ShuffleKind == 1) // unary
1630 return isVMerge(N, UnitSize, 0, 0);
1631 else if (ShuffleKind == 0) // normal
1632 return isVMerge(N, UnitSize, 0, 16);
1633 else
1634 return false;
1635 }
1636}
1637
1638/**
1639 * Common function used to match vmrgew and vmrgow shuffles
1640 *
1641 * The indexOffset determines whether to look for even or odd words in
1642 * the shuffle mask. This is based on the of the endianness of the target
1643 * machine.
1644 * - Little Endian:
1645 * - Use offset of 0 to check for odd elements
1646 * - Use offset of 4 to check for even elements
1647 * - Big Endian:
1648 * - Use offset of 0 to check for even elements
1649 * - Use offset of 4 to check for odd elements
1650 * A detailed description of the vector element ordering for little endian and
1651 * big endian can be found at
1652 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1653 * Targeting your applications - what little endian and big endian IBM XL C/C++
1654 * compiler differences mean to you
1655 *
1656 * The mask to the shuffle vector instruction specifies the indices of the
1657 * elements from the two input vectors to place in the result. The elements are
1658 * numbered in array-access order, starting with the first vector. These vectors
1659 * are always of type v16i8, thus each vector will contain 16 elements of size
1660 * 8. More info on the shuffle vector can be found in the
1661 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1662 * Language Reference.
1663 *
1664 * The RHSStartValue indicates whether the same input vectors are used (unary)
1665 * or two different input vectors are used, based on the following:
1666 * - If the instruction uses the same vector for both inputs, the range of the
1667 * indices will be 0 to 15. In this case, the RHSStart value passed should
1668 * be 0.
1669 * - If the instruction has two different vectors then the range of the
1670 * indices will be 0 to 31. In this case, the RHSStart value passed should
1671 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1672 * to 31 specify elements in the second vector).
1673 *
1674 * \param[in] N The shuffle vector SD Node to analyze
1675 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1676 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1677 * vector to the shuffle_vector instruction
1678 * \return true iff this shuffle vector represents an even or odd word merge
1679 */
1680static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1681 unsigned RHSStartValue) {
1682 if (N->getValueType(0) != MVT::v16i8)
1683 return false;
1684
1685 for (unsigned i = 0; i < 2; ++i)
1686 for (unsigned j = 0; j < 4; ++j)
1687 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1688 i*RHSStartValue+j+IndexOffset) ||
1689 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1690 i*RHSStartValue+j+IndexOffset+8))
1691 return false;
1692 return true;
1693}
1694
1695/**
1696 * Determine if the specified shuffle mask is suitable for the vmrgew or
1697 * vmrgow instructions.
1698 *
1699 * \param[in] N The shuffle vector SD Node to analyze
1700 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1701 * \param[in] ShuffleKind Identify the type of merge:
1702 * - 0 = big-endian merge with two different inputs;
1703 * - 1 = either-endian merge with two identical inputs;
1704 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1705 * little-endian merges).
1706 * \param[in] DAG The current SelectionDAG
1707 * \return true iff this shuffle mask
1708 */
1709bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1710 unsigned ShuffleKind, SelectionDAG &DAG) {
1711 if (DAG.getDataLayout().isLittleEndian()) {
1712 unsigned indexOffset = CheckEven ? 4 : 0;
1713 if (ShuffleKind == 1) // Unary
1714 return isVMerge(N, indexOffset, 0);
1715 else if (ShuffleKind == 2) // swapped
1716 return isVMerge(N, indexOffset, 16);
1717 else
1718 return false;
1719 }
1720 else {
1721 unsigned indexOffset = CheckEven ? 0 : 4;
1722 if (ShuffleKind == 1) // Unary
1723 return isVMerge(N, indexOffset, 0);
1724 else if (ShuffleKind == 0) // Normal
1725 return isVMerge(N, indexOffset, 16);
1726 else
1727 return false;
1728 }
1729 return false;
1730}
1731
1732/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1733/// amount, otherwise return -1.
1734/// The ShuffleKind distinguishes between big-endian operations with two
1735/// different inputs (0), either-endian operations with two identical inputs
1736/// (1), and little-endian operations with two different inputs (2). For the
1737/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1738int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1739 SelectionDAG &DAG) {
1740 if (N->getValueType(0) != MVT::v16i8)
1741 return -1;
1742
1743 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1744
1745 // Find the first non-undef value in the shuffle mask.
1746 unsigned i;
1747 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1748 /*search*/;
1749
1750 if (i == 16) return -1; // all undef.
1751
1752 // Otherwise, check to see if the rest of the elements are consecutively
1753 // numbered from this value.
1754 unsigned ShiftAmt = SVOp->getMaskElt(i);
1755 if (ShiftAmt < i) return -1;
1756
1757 ShiftAmt -= i;
1758 bool isLE = DAG.getDataLayout().isLittleEndian();
1759
1760 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1761 // Check the rest of the elements to see if they are consecutive.
1762 for (++i; i != 16; ++i)
1763 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1764 return -1;
1765 } else if (ShuffleKind == 1) {
1766 // Check the rest of the elements to see if they are consecutive.
1767 for (++i; i != 16; ++i)
1768 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1769 return -1;
1770 } else
1771 return -1;
1772
1773 if (isLE)
1774 ShiftAmt = 16 - ShiftAmt;
1775
1776 return ShiftAmt;
1777}
1778
1779/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1780/// specifies a splat of a single element that is suitable for input to
1781/// VSPLTB/VSPLTH/VSPLTW.
1782bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1783 assert(N->getValueType(0) == MVT::v16i8 &&((N->getValueType(0) == MVT::v16i8 && (EltSize == 1
|| EltSize == 2 || EltSize == 4)) ? static_cast<void> (
0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1784, __PRETTY_FUNCTION__))
1784 (EltSize == 1 || EltSize == 2 || EltSize == 4))((N->getValueType(0) == MVT::v16i8 && (EltSize == 1
|| EltSize == 2 || EltSize == 4)) ? static_cast<void> (
0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1784, __PRETTY_FUNCTION__))
;
1785
1786 // The consecutive indices need to specify an element, not part of two
1787 // different elements. So abandon ship early if this isn't the case.
1788 if (N->getMaskElt(0) % EltSize != 0)
1789 return false;
1790
1791 // This is a splat operation if each element of the permute is the same, and
1792 // if the value doesn't reference the second vector.
1793 unsigned ElementBase = N->getMaskElt(0);
1794
1795 // FIXME: Handle UNDEF elements too!
1796 if (ElementBase >= 16)
1797 return false;
1798
1799 // Check that the indices are consecutive, in the case of a multi-byte element
1800 // splatted with a v16i8 mask.
1801 for (unsigned i = 1; i != EltSize; ++i)
1802 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1803 return false;
1804
1805 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1806 if (N->getMaskElt(i) < 0) continue;
1807 for (unsigned j = 0; j != EltSize; ++j)
1808 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1809 return false;
1810 }
1811 return true;
1812}
1813
1814/// Check that the mask is shuffling N byte elements. Within each N byte
1815/// element of the mask, the indices could be either in increasing or
1816/// decreasing order as long as they are consecutive.
1817/// \param[in] N the shuffle vector SD Node to analyze
1818/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1819/// Word/DoubleWord/QuadWord).
1820/// \param[in] StepLen the delta indices number among the N byte element, if
1821/// the mask is in increasing/decreasing order then it is 1/-1.
1822/// \return true iff the mask is shuffling N byte elements.
1823static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1824 int StepLen) {
1825 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1826, __PRETTY_FUNCTION__))
1826 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1826, __PRETTY_FUNCTION__))
;
1827 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1827, __PRETTY_FUNCTION__))
;
1828
1829 unsigned NumOfElem = 16 / Width;
1830 unsigned MaskVal[16]; // Width is never greater than 16
1831 for (unsigned i = 0; i < NumOfElem; ++i) {
1832 MaskVal[0] = N->getMaskElt(i * Width);
1833 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1834 return false;
1835 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1836 return false;
1837 }
1838
1839 for (unsigned int j = 1; j < Width; ++j) {
1840 MaskVal[j] = N->getMaskElt(i * Width + j);
1841 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1842 return false;
1843 }
1844 }
1845 }
1846
1847 return true;
1848}
1849
1850bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1851 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1852 if (!isNByteElemShuffleMask(N, 4, 1))
1853 return false;
1854
1855 // Now we look at mask elements 0,4,8,12
1856 unsigned M0 = N->getMaskElt(0) / 4;
1857 unsigned M1 = N->getMaskElt(4) / 4;
1858 unsigned M2 = N->getMaskElt(8) / 4;
1859 unsigned M3 = N->getMaskElt(12) / 4;
1860 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1861 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1862
1863 // Below, let H and L be arbitrary elements of the shuffle mask
1864 // where H is in the range [4,7] and L is in the range [0,3].
1865 // H, 1, 2, 3 or L, 5, 6, 7
1866 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1867 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1868 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1869 InsertAtByte = IsLE ? 12 : 0;
1870 Swap = M0 < 4;
1871 return true;
1872 }
1873 // 0, H, 2, 3 or 4, L, 6, 7
1874 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1875 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1876 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1877 InsertAtByte = IsLE ? 8 : 4;
1878 Swap = M1 < 4;
1879 return true;
1880 }
1881 // 0, 1, H, 3 or 4, 5, L, 7
1882 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1883 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1884 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1885 InsertAtByte = IsLE ? 4 : 8;
1886 Swap = M2 < 4;
1887 return true;
1888 }
1889 // 0, 1, 2, H or 4, 5, 6, L
1890 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1891 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1892 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1893 InsertAtByte = IsLE ? 0 : 12;
1894 Swap = M3 < 4;
1895 return true;
1896 }
1897
1898 // If both vector operands for the shuffle are the same vector, the mask will
1899 // contain only elements from the first one and the second one will be undef.
1900 if (N->getOperand(1).isUndef()) {
1901 ShiftElts = 0;
1902 Swap = true;
1903 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1904 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1905 InsertAtByte = IsLE ? 12 : 0;
1906 return true;
1907 }
1908 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1909 InsertAtByte = IsLE ? 8 : 4;
1910 return true;
1911 }
1912 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1913 InsertAtByte = IsLE ? 4 : 8;
1914 return true;
1915 }
1916 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1917 InsertAtByte = IsLE ? 0 : 12;
1918 return true;
1919 }
1920 }
1921
1922 return false;
1923}
1924
1925bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1926 bool &Swap, bool IsLE) {
1927 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1927, __PRETTY_FUNCTION__))
;
7
'?' condition is true
1928 // Ensure each byte index of the word is consecutive.
1929 if (!isNByteElemShuffleMask(N, 4, 1))
8
Assuming the condition is false
9
Taking false branch
1930 return false;
1931
1932 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1933 unsigned M0 = N->getMaskElt(0) / 4;
1934 unsigned M1 = N->getMaskElt(4) / 4;
1935 unsigned M2 = N->getMaskElt(8) / 4;
1936 unsigned M3 = N->getMaskElt(12) / 4;
1937
1938 // If both vector operands for the shuffle are the same vector, the mask will
1939 // contain only elements from the first one and the second one will be undef.
1940 if (N->getOperand(1).isUndef()) {
10
Calling 'SDValue::isUndef'
16
Returning from 'SDValue::isUndef'
17
Taking false branch
1941 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1941, __PRETTY_FUNCTION__))
;
1942 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1943 return false;
1944
1945 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1946 Swap = false;
1947 return true;
1948 }
1949
1950 // Ensure each word index of the ShuffleVector Mask is consecutive.
1951 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
18
Assuming the condition is false
19
Assuming the condition is false
20
Assuming the condition is false
21
Taking false branch
1952 return false;
1953
1954 if (IsLE) {
22
Assuming 'IsLE' is false
23
Taking false branch
1955 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1956 // Input vectors don't need to be swapped if the leading element
1957 // of the result is one of the 3 left elements of the second vector
1958 // (or if there is no shift to be done at all).
1959 Swap = false;
1960 ShiftElts = (8 - M0) % 8;
1961 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1962 // Input vectors need to be swapped if the leading element
1963 // of the result is one of the 3 left elements of the first vector
1964 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1965 Swap = true;
1966 ShiftElts = (4 - M0) % 4;
1967 }
1968
1969 return true;
1970 } else { // BE
1971 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
24
Assuming 'M0' is not equal to 0
25
Assuming 'M0' is not equal to 1
26
Assuming 'M0' is not equal to 2
27
Assuming 'M0' is not equal to 3
28
Taking false branch
1972 // Input vectors don't need to be swapped if the leading element
1973 // of the result is one of the 4 elements of the first vector.
1974 Swap = false;
1975 ShiftElts = M0;
1976 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
29
Assuming 'M0' is not equal to 4
30
Assuming 'M0' is not equal to 5
31
Assuming 'M0' is not equal to 6
32
Assuming 'M0' is not equal to 7
33
Taking false branch
1977 // Input vectors need to be swapped if the leading element
1978 // of the result is one of the 4 elements of the right vector.
1979 Swap = true;
1980 ShiftElts = M0 - 4;
1981 }
1982
1983 return true;
34
Returning without writing to 'ShiftElts'
35
Returning the value 1, which participates in a condition later
1984 }
1985}
1986
1987bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1988 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1988, __PRETTY_FUNCTION__))
;
1989
1990 if (!isNByteElemShuffleMask(N, Width, -1))
1991 return false;
1992
1993 for (int i = 0; i < 16; i += Width)
1994 if (N->getMaskElt(i) != i + Width - 1)
1995 return false;
1996
1997 return true;
1998}
1999
2000bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2001 return isXXBRShuffleMaskHelper(N, 2);
2002}
2003
2004bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2005 return isXXBRShuffleMaskHelper(N, 4);
2006}
2007
2008bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2009 return isXXBRShuffleMaskHelper(N, 8);
2010}
2011
2012bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2013 return isXXBRShuffleMaskHelper(N, 16);
2014}
2015
2016/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2017/// if the inputs to the instruction should be swapped and set \p DM to the
2018/// value for the immediate.
2019/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2020/// AND element 0 of the result comes from the first input (LE) or second input
2021/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2022/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2023/// mask.
2024bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2025 bool &Swap, bool IsLE) {
2026 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2026, __PRETTY_FUNCTION__))
;
2027
2028 // Ensure each byte index of the double word is consecutive.
2029 if (!isNByteElemShuffleMask(N, 8, 1))
2030 return false;
2031
2032 unsigned M0 = N->getMaskElt(0) / 8;
2033 unsigned M1 = N->getMaskElt(8) / 8;
2034 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2034, __PRETTY_FUNCTION__))
;
2035
2036 // If both vector operands for the shuffle are the same vector, the mask will
2037 // contain only elements from the first one and the second one will be undef.
2038 if (N->getOperand(1).isUndef()) {
2039 if ((M0 | M1) < 2) {
2040 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2041 Swap = false;
2042 return true;
2043 } else
2044 return false;
2045 }
2046
2047 if (IsLE) {
2048 if (M0 > 1 && M1 < 2) {
2049 Swap = false;
2050 } else if (M0 < 2 && M1 > 1) {
2051 M0 = (M0 + 2) % 4;
2052 M1 = (M1 + 2) % 4;
2053 Swap = true;
2054 } else
2055 return false;
2056
2057 // Note: if control flow comes here that means Swap is already set above
2058 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2059 return true;
2060 } else { // BE
2061 if (M0 < 2 && M1 > 1) {
2062 Swap = false;
2063 } else if (M0 > 1 && M1 < 2) {
2064 M0 = (M0 + 2) % 4;
2065 M1 = (M1 + 2) % 4;
2066 Swap = true;
2067 } else
2068 return false;
2069
2070 // Note: if control flow comes here that means Swap is already set above
2071 DM = (M0 << 1) + (M1 & 1);
2072 return true;
2073 }
2074}
2075
2076
2077/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2078/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2079unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2080 SelectionDAG &DAG) {
2081 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2082 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2082, __PRETTY_FUNCTION__))
;
2083 if (DAG.getDataLayout().isLittleEndian())
2084 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2085 else
2086 return SVOp->getMaskElt(0) / EltSize;
2087}
2088
2089/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2090/// by using a vspltis[bhw] instruction of the specified element size, return
2091/// the constant being splatted. The ByteSize field indicates the number of
2092/// bytes of each element [124] -> [bhw].
2093SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2094 SDValue OpVal(nullptr, 0);
2095
2096 // If ByteSize of the splat is bigger than the element size of the
2097 // build_vector, then we have a case where we are checking for a splat where
2098 // multiple elements of the buildvector are folded together into a single
2099 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2100 unsigned EltSize = 16/N->getNumOperands();
2101 if (EltSize < ByteSize) {
2102 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2103 SDValue UniquedVals[4];
2104 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2104, __PRETTY_FUNCTION__))
;
2105
2106 // See if all of the elements in the buildvector agree across.
2107 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2108 if (N->getOperand(i).isUndef()) continue;
2109 // If the element isn't a constant, bail fully out.
2110 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2111
2112 if (!UniquedVals[i&(Multiple-1)].getNode())
2113 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2114 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2115 return SDValue(); // no match.
2116 }
2117
2118 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2119 // either constant or undef values that are identical for each chunk. See
2120 // if these chunks can form into a larger vspltis*.
2121
2122 // Check to see if all of the leading entries are either 0 or -1. If
2123 // neither, then this won't fit into the immediate field.
2124 bool LeadingZero = true;
2125 bool LeadingOnes = true;
2126 for (unsigned i = 0; i != Multiple-1; ++i) {
2127 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2128
2129 LeadingZero &= isNullConstant(UniquedVals[i]);
2130 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2131 }
2132 // Finally, check the least significant entry.
2133 if (LeadingZero) {
2134 if (!UniquedVals[Multiple-1].getNode())
2135 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2136 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2137 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2138 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2139 }
2140 if (LeadingOnes) {
2141 if (!UniquedVals[Multiple-1].getNode())
2142 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2143 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2144 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2145 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2146 }
2147
2148 return SDValue();
2149 }
2150
2151 // Check to see if this buildvec has a single non-undef value in its elements.
2152 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2153 if (N->getOperand(i).isUndef()) continue;
2154 if (!OpVal.getNode())
2155 OpVal = N->getOperand(i);
2156 else if (OpVal != N->getOperand(i))
2157 return SDValue();
2158 }
2159
2160 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2161
2162 unsigned ValSizeInBytes = EltSize;
2163 uint64_t Value = 0;
2164 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2165 Value = CN->getZExtValue();
2166 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2167 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2167, __PRETTY_FUNCTION__))
;
2168 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2169 }
2170
2171 // If the splat value is larger than the element value, then we can never do
2172 // this splat. The only case that we could fit the replicated bits into our
2173 // immediate field for would be zero, and we prefer to use vxor for it.
2174 if (ValSizeInBytes < ByteSize) return SDValue();
2175
2176 // If the element value is larger than the splat value, check if it consists
2177 // of a repeated bit pattern of size ByteSize.
2178 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2179 return SDValue();
2180
2181 // Properly sign extend the value.
2182 int MaskVal = SignExtend32(Value, ByteSize * 8);
2183
2184 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2185 if (MaskVal == 0) return SDValue();
2186
2187 // Finally, if this value fits in a 5 bit sext field, return it
2188 if (SignExtend32<5>(MaskVal) == MaskVal)
2189 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2190 return SDValue();
2191}
2192
2193/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2194/// amount, otherwise return -1.
2195int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2196 EVT VT = N->getValueType(0);
2197 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2198 return -1;
2199
2200 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2201
2202 // Find the first non-undef value in the shuffle mask.
2203 unsigned i;
2204 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2205 /*search*/;
2206
2207 if (i == 4) return -1; // all undef.
2208
2209 // Otherwise, check to see if the rest of the elements are consecutively
2210 // numbered from this value.
2211 unsigned ShiftAmt = SVOp->getMaskElt(i);
2212 if (ShiftAmt < i) return -1;
2213 ShiftAmt -= i;
2214
2215 // Check the rest of the elements to see if they are consecutive.
2216 for (++i; i != 4; ++i)
2217 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2218 return -1;
2219
2220 return ShiftAmt;
2221}
2222
2223//===----------------------------------------------------------------------===//
2224// Addressing Mode Selection
2225//===----------------------------------------------------------------------===//
2226
2227/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2228/// or 64-bit immediate, and if the value can be accurately represented as a
2229/// sign extension from a 16-bit value. If so, this returns true and the
2230/// immediate.
2231bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2232 if (!isa<ConstantSDNode>(N))
2233 return false;
2234
2235 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2236 if (N->getValueType(0) == MVT::i32)
2237 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2238 else
2239 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2240}
2241bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2242 return isIntS16Immediate(Op.getNode(), Imm);
2243}
2244
2245
2246/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2247/// be represented as an indexed [r+r] operation.
2248bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2249 SDValue &Index,
2250 SelectionDAG &DAG) const {
2251 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2252 UI != E; ++UI) {
2253 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2254 if (Memop->getMemoryVT() == MVT::f64) {
2255 Base = N.getOperand(0);
2256 Index = N.getOperand(1);
2257 return true;
2258 }
2259 }
2260 }
2261 return false;
2262}
2263
2264/// SelectAddressRegReg - Given the specified addressed, check to see if it
2265/// can be represented as an indexed [r+r] operation. Returns false if it
2266/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2267/// non-zero and N can be represented by a base register plus a signed 16-bit
2268/// displacement, make a more precise judgement by checking (displacement % \p
2269/// EncodingAlignment).
2270bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2271 SDValue &Index, SelectionDAG &DAG,
2272 unsigned EncodingAlignment) const {
2273 int16_t imm = 0;
2274 if (N.getOpcode() == ISD::ADD) {
2275 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2276 // SPE load/store can only handle 8-bit offsets.
2277 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2278 return true;
2279 if (isIntS16Immediate(N.getOperand(1), imm) &&
2280 (!EncodingAlignment || !(imm % EncodingAlignment)))
2281 return false; // r+i
2282 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2283 return false; // r+i
2284
2285 Base = N.getOperand(0);
2286 Index = N.getOperand(1);
2287 return true;
2288 } else if (N.getOpcode() == ISD::OR) {
2289 if (isIntS16Immediate(N.getOperand(1), imm) &&
2290 (!EncodingAlignment || !(imm % EncodingAlignment)))
2291 return false; // r+i can fold it if we can.
2292
2293 // If this is an or of disjoint bitfields, we can codegen this as an add
2294 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2295 // disjoint.
2296 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2297
2298 if (LHSKnown.Zero.getBoolValue()) {
2299 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2300 // If all of the bits are known zero on the LHS or RHS, the add won't
2301 // carry.
2302 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2303 Base = N.getOperand(0);
2304 Index = N.getOperand(1);
2305 return true;
2306 }
2307 }
2308 }
2309
2310 return false;
2311}
2312
2313// If we happen to be doing an i64 load or store into a stack slot that has
2314// less than a 4-byte alignment, then the frame-index elimination may need to
2315// use an indexed load or store instruction (because the offset may not be a
2316// multiple of 4). The extra register needed to hold the offset comes from the
2317// register scavenger, and it is possible that the scavenger will need to use
2318// an emergency spill slot. As a result, we need to make sure that a spill slot
2319// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2320// stack slot.
2321static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2322 // FIXME: This does not handle the LWA case.
2323 if (VT != MVT::i64)
2324 return;
2325
2326 // NOTE: We'll exclude negative FIs here, which come from argument
2327 // lowering, because there are no known test cases triggering this problem
2328 // using packed structures (or similar). We can remove this exclusion if
2329 // we find such a test case. The reason why this is so test-case driven is
2330 // because this entire 'fixup' is only to prevent crashes (from the
2331 // register scavenger) on not-really-valid inputs. For example, if we have:
2332 // %a = alloca i1
2333 // %b = bitcast i1* %a to i64*
2334 // store i64* a, i64 b
2335 // then the store should really be marked as 'align 1', but is not. If it
2336 // were marked as 'align 1' then the indexed form would have been
2337 // instruction-selected initially, and the problem this 'fixup' is preventing
2338 // won't happen regardless.
2339 if (FrameIdx < 0)
2340 return;
2341
2342 MachineFunction &MF = DAG.getMachineFunction();
2343 MachineFrameInfo &MFI = MF.getFrameInfo();
2344
2345 unsigned Align = MFI.getObjectAlignment(FrameIdx);
2346 if (Align >= 4)
2347 return;
2348
2349 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2350 FuncInfo->setHasNonRISpills();
2351}
2352
2353/// Returns true if the address N can be represented by a base register plus
2354/// a signed 16-bit displacement [r+imm], and if it is not better
2355/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2356/// displacements that are multiples of that value.
2357bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2358 SDValue &Base,
2359 SelectionDAG &DAG,
2360 unsigned EncodingAlignment) const {
2361 // FIXME dl should come from parent load or store, not from address
2362 SDLoc dl(N);
2363 // If this can be more profitably realized as r+r, fail.
2364 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2365 return false;
2366
2367 if (N.getOpcode() == ISD::ADD) {
2368 int16_t imm = 0;
2369 if (isIntS16Immediate(N.getOperand(1), imm) &&
2370 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2371 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2372 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2373 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2374 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2375 } else {
2376 Base = N.getOperand(0);
2377 }
2378 return true; // [r+i]
2379 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2380 // Match LOAD (ADD (X, Lo(G))).
2381 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2382, __PRETTY_FUNCTION__))
2382 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2382, __PRETTY_FUNCTION__))
;
2383 Disp = N.getOperand(1).getOperand(0); // The global address.
2384 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
2385 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
2386 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
2387 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2387, __PRETTY_FUNCTION__))
;
2388 Base = N.getOperand(0);
2389 return true; // [&g+r]
2390 }
2391 } else if (N.getOpcode() == ISD::OR) {
2392 int16_t imm = 0;
2393 if (isIntS16Immediate(N.getOperand(1), imm) &&
2394 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2395 // If this is an or of disjoint bitfields, we can codegen this as an add
2396 // (for better address arithmetic) if the LHS and RHS of the OR are
2397 // provably disjoint.
2398 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2399
2400 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2401 // If all of the bits are known zero on the LHS or RHS, the add won't
2402 // carry.
2403 if (FrameIndexSDNode *FI =
2404 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2405 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2406 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2407 } else {
2408 Base = N.getOperand(0);
2409 }
2410 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2411 return true;
2412 }
2413 }
2414 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2415 // Loading from a constant address.
2416
2417 // If this address fits entirely in a 16-bit sext immediate field, codegen
2418 // this as "d, 0"
2419 int16_t Imm;
2420 if (isIntS16Immediate(CN, Imm) &&
2421 (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
2422 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2423 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2424 CN->getValueType(0));
2425 return true;
2426 }
2427
2428 // Handle 32-bit sext immediates with LIS + addr mode.
2429 if ((CN->getValueType(0) == MVT::i32 ||
2430 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2431 (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
2432 int Addr = (int)CN->getZExtValue();
2433
2434 // Otherwise, break this down into an LIS + disp.
2435 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2436
2437 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2438 MVT::i32);
2439 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2440 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2441 return true;
2442 }
2443 }
2444
2445 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2446 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2447 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2448 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2449 } else
2450 Base = N;
2451 return true; // [r+0]
2452}
2453
2454/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2455/// represented as an indexed [r+r] operation.
2456bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2457 SDValue &Index,
2458 SelectionDAG &DAG) const {
2459 // Check to see if we can easily represent this as an [r+r] address. This
2460 // will fail if it thinks that the address is more profitably represented as
2461 // reg+imm, e.g. where imm = 0.
2462 if (SelectAddressRegReg(N, Base, Index, DAG))
2463 return true;
2464
2465 // If the address is the result of an add, we will utilize the fact that the
2466 // address calculation includes an implicit add. However, we can reduce
2467 // register pressure if we do not materialize a constant just for use as the
2468 // index register. We only get rid of the add if it is not an add of a
2469 // value and a 16-bit signed constant and both have a single use.
2470 int16_t imm = 0;
2471 if (N.getOpcode() == ISD::ADD &&
2472 (!isIntS16Immediate(N.getOperand(1), imm) ||
2473 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2474 Base = N.getOperand(0);
2475 Index = N.getOperand(1);
2476 return true;
2477 }
2478
2479 // Otherwise, do it the hard way, using R0 as the base register.
2480 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2481 N.getValueType());
2482 Index = N;
2483 return true;
2484}
2485
2486/// Returns true if we should use a direct load into vector instruction
2487/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2488static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2489
2490 // If there are any other uses other than scalar to vector, then we should
2491 // keep it as a scalar load -> direct move pattern to prevent multiple
2492 // loads.
2493 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2494 if (!LD)
2495 return false;
2496
2497 EVT MemVT = LD->getMemoryVT();
2498 if (!MemVT.isSimple())
2499 return false;
2500 switch(MemVT.getSimpleVT().SimpleTy) {
2501 case MVT::i64:
2502 break;
2503 case MVT::i32:
2504 if (!ST.hasP8Vector())
2505 return false;
2506 break;
2507 case MVT::i16:
2508 case MVT::i8:
2509 if (!ST.hasP9Vector())
2510 return false;
2511 break;
2512 default:
2513 return false;
2514 }
2515
2516 SDValue LoadedVal(N, 0);
2517 if (!LoadedVal.hasOneUse())
2518 return false;
2519
2520 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2521 UI != UE; ++UI)
2522 if (UI.getUse().get().getResNo() == 0 &&
2523 UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
2524 return false;
2525
2526 return true;
2527}
2528
2529/// getPreIndexedAddressParts - returns true by value, base pointer and
2530/// offset pointer and addressing mode by reference if the node's address
2531/// can be legally represented as pre-indexed load / store address.
2532bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2533 SDValue &Offset,
2534 ISD::MemIndexedMode &AM,
2535 SelectionDAG &DAG) const {
2536 if (DisablePPCPreinc) return false;
2537
2538 bool isLoad = true;
2539 SDValue Ptr;
2540 EVT VT;
2541 unsigned Alignment;
2542 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2543 Ptr = LD->getBasePtr();
2544 VT = LD->getMemoryVT();
2545 Alignment = LD->getAlignment();
2546 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2547 Ptr = ST->getBasePtr();
2548 VT = ST->getMemoryVT();
2549 Alignment = ST->getAlignment();
2550 isLoad = false;
2551 } else
2552 return false;
2553
2554 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2555 // instructions because we can fold these into a more efficient instruction
2556 // instead, (such as LXSD).
2557 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2558 return false;
2559 }
2560
2561 // PowerPC doesn't have preinc load/store instructions for vectors (except
2562 // for QPX, which does have preinc r+r forms).
2563 if (VT.isVector()) {
2564 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2565 return false;
2566 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2567 AM = ISD::PRE_INC;
2568 return true;
2569 }
2570 }
2571
2572 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2573 // Common code will reject creating a pre-inc form if the base pointer
2574 // is a frame index, or if N is a store and the base pointer is either
2575 // the same as or a predecessor of the value being stored. Check for
2576 // those situations here, and try with swapped Base/Offset instead.
2577 bool Swap = false;
2578
2579 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2580 Swap = true;
2581 else if (!isLoad) {
2582 SDValue Val = cast<StoreSDNode>(N)->getValue();
2583 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2584 Swap = true;
2585 }
2586
2587 if (Swap)
2588 std::swap(Base, Offset);
2589
2590 AM = ISD::PRE_INC;
2591 return true;
2592 }
2593
2594 // LDU/STU can only handle immediates that are a multiple of 4.
2595 if (VT != MVT::i64) {
2596 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2597 return false;
2598 } else {
2599 // LDU/STU need an address with at least 4-byte alignment.
2600 if (Alignment < 4)
2601 return false;
2602
2603 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2604 return false;
2605 }
2606
2607 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2608 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2609 // sext i32 to i64 when addr mode is r+i.
2610 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2611 LD->getExtensionType() == ISD::SEXTLOAD &&
2612 isa<ConstantSDNode>(Offset))
2613 return false;
2614 }
2615
2616 AM = ISD::PRE_INC;
2617 return true;
2618}
2619
2620//===----------------------------------------------------------------------===//
2621// LowerOperation implementation
2622//===----------------------------------------------------------------------===//
2623
2624/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2625/// and LoOpFlags to the target MO flags.
2626static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2627 unsigned &HiOpFlags, unsigned &LoOpFlags,
2628 const GlobalValue *GV = nullptr) {
2629 HiOpFlags = PPCII::MO_HA;
2630 LoOpFlags = PPCII::MO_LO;
2631
2632 // Don't use the pic base if not in PIC relocation model.
2633 if (IsPIC) {
2634 HiOpFlags |= PPCII::MO_PIC_FLAG;
2635 LoOpFlags |= PPCII::MO_PIC_FLAG;
2636 }
2637
2638 // If this is a reference to a global value that requires a non-lazy-ptr, make
2639 // sure that instruction lowering adds it.
2640 if (GV && Subtarget.hasLazyResolverStub(GV)) {
2641 HiOpFlags |= PPCII::MO_NLP_FLAG;
2642 LoOpFlags |= PPCII::MO_NLP_FLAG;
2643
2644 if (GV->hasHiddenVisibility()) {
2645 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2646 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2647 }
2648 }
2649}
2650
2651static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2652 SelectionDAG &DAG) {
2653 SDLoc DL(HiPart);
2654 EVT PtrVT = HiPart.getValueType();
2655 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2656
2657 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2658 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2659
2660 // With PIC, the first instruction is actually "GR+hi(&G)".
2661 if (isPIC)
2662 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2663 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2664
2665 // Generate non-pic code that has direct accesses to the constant pool.
2666 // The address of the global is just (hi(&g)+lo(&g)).
2667 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2668}
2669
2670static void setUsesTOCBasePtr(MachineFunction &MF) {
2671 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2672 FuncInfo->setUsesTOCBasePtr();
2673}
2674
2675static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2676 setUsesTOCBasePtr(DAG.getMachineFunction());
2677}
2678
2679SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2680 SDValue GA) const {
2681 const bool Is64Bit = Subtarget.isPPC64();
2682 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2683 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2684 : Subtarget.isAIXABI()
2685 ? DAG.getRegister(PPC::R2, VT)
2686 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2687 SDValue Ops[] = { GA, Reg };
2688 return DAG.getMemIntrinsicNode(
2689 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2690 MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
2691 MachineMemOperand::MOLoad);
2692}
2693
2694SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2695 SelectionDAG &DAG) const {
2696 EVT PtrVT = Op.getValueType();
2697 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2698 const Constant *C = CP->getConstVal();
2699
2700 // 64-bit SVR4 ABI code is always position-independent.
2701 // The actual address of the GlobalValue is stored in the TOC.
2702 if (Subtarget.is64BitELFABI()) {
2703 setUsesTOCBasePtr(DAG);
2704 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2705 return getTOCEntry(DAG, SDLoc(CP), GA);
2706 }
2707
2708 unsigned MOHiFlag, MOLoFlag;
2709 bool IsPIC = isPositionIndependent();
2710 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2711
2712 if (IsPIC && Subtarget.isSVR4ABI()) {
2713 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2714 PPCII::MO_PIC_FLAG);
2715 return getTOCEntry(DAG, SDLoc(CP), GA);
2716 }
2717
2718 SDValue CPIHi =
2719 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2720 SDValue CPILo =
2721 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2722 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2723}
2724
2725// For 64-bit PowerPC, prefer the more compact relative encodings.
2726// This trades 32 bits per jump table entry for one or two instructions
2727// on the jump site.
2728unsigned PPCTargetLowering::getJumpTableEncoding() const {
2729 if (isJumpTableRelative())
2730 return MachineJumpTableInfo::EK_LabelDifference32;
2731
2732 return TargetLowering::getJumpTableEncoding();
2733}
2734
2735bool PPCTargetLowering::isJumpTableRelative() const {
2736 if (Subtarget.isPPC64())
2737 return true;
2738 return TargetLowering::isJumpTableRelative();
2739}
2740
2741SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2742 SelectionDAG &DAG) const {
2743 if (!Subtarget.isPPC64())
2744 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2745
2746 switch (getTargetMachine().getCodeModel()) {
2747 case CodeModel::Small:
2748 case CodeModel::Medium:
2749 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2750 default:
2751 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2752 getPointerTy(DAG.getDataLayout()));
2753 }
2754}
2755
2756const MCExpr *
2757PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2758 unsigned JTI,
2759 MCContext &Ctx) const {
2760 if (!Subtarget.isPPC64())
2761 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2762
2763 switch (getTargetMachine().getCodeModel()) {
2764 case CodeModel::Small:
2765 case CodeModel::Medium:
2766 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2767 default:
2768 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2769 }
2770}
2771
2772SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2773 EVT PtrVT = Op.getValueType();
2774 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2775
2776 // 64-bit SVR4 ABI code is always position-independent.
2777 // The actual address of the GlobalValue is stored in the TOC.
2778 if (Subtarget.is64BitELFABI()) {
2779 setUsesTOCBasePtr(DAG);
2780 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2781 return getTOCEntry(DAG, SDLoc(JT), GA);
2782 }
2783
2784 unsigned MOHiFlag, MOLoFlag;
2785 bool IsPIC = isPositionIndependent();
2786 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2787
2788 if (IsPIC && Subtarget.isSVR4ABI()) {
2789 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2790 PPCII::MO_PIC_FLAG);
2791 return getTOCEntry(DAG, SDLoc(GA), GA);
2792 }
2793
2794 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2795 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2796 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2797}
2798
2799SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2800 SelectionDAG &DAG) const {
2801 EVT PtrVT = Op.getValueType();
2802 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2803 const BlockAddress *BA = BASDN->getBlockAddress();
2804
2805 // 64-bit SVR4 ABI code is always position-independent.
2806 // The actual BlockAddress is stored in the TOC.
2807 if (Subtarget.is64BitELFABI()) {
2808 setUsesTOCBasePtr(DAG);
2809 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2810 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2811 }
2812
2813 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2814 if (Subtarget.is32BitELFABI() && isPositionIndependent())
2815 return getTOCEntry(
2816 DAG, SDLoc(BASDN),
2817 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2818
2819 unsigned MOHiFlag, MOLoFlag;
2820 bool IsPIC = isPositionIndependent();
2821 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2822 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2823 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2824 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2825}
2826
2827SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2828 SelectionDAG &DAG) const {
2829 // FIXME: TLS addresses currently use medium model code sequences,
2830 // which is the most useful form. Eventually support for small and
2831 // large models could be added if users need it, at the cost of
2832 // additional complexity.
2833 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2834 if (DAG.getTarget().useEmulatedTLS())
2835 return LowerToTLSEmulatedModel(GA, DAG);
2836
2837 SDLoc dl(GA);
2838 const GlobalValue *GV = GA->getGlobal();
2839 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2840 bool is64bit = Subtarget.isPPC64();
2841 const Module *M = DAG.getMachineFunction().getFunction().getParent();
2842 PICLevel::Level picLevel = M->getPICLevel();
2843
2844 const TargetMachine &TM = getTargetMachine();
2845 TLSModel::Model Model = TM.getTLSModel(GV);
2846
2847 if (Model == TLSModel::LocalExec) {
2848 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2849 PPCII::MO_TPREL_HA);
2850 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2851 PPCII::MO_TPREL_LO);
2852 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2853 : DAG.getRegister(PPC::R2, MVT::i32);
2854
2855 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2856 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2857 }
2858
2859 if (Model == TLSModel::InitialExec) {
2860 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2861 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2862 PPCII::MO_TLS);
2863 SDValue GOTPtr;
2864 if (is64bit) {
2865 setUsesTOCBasePtr(DAG);
2866 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2867 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2868 PtrVT, GOTReg, TGA);
2869 } else {
2870 if (!TM.isPositionIndependent())
2871 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2872 else if (picLevel == PICLevel::SmallPIC)
2873 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2874 else
2875 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2876 }
2877 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2878 PtrVT, TGA, GOTPtr);
2879 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2880 }
2881
2882 if (Model == TLSModel::GeneralDynamic) {
2883 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2884 SDValue GOTPtr;
2885 if (is64bit) {
2886 setUsesTOCBasePtr(DAG);
2887 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2888 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2889 GOTReg, TGA);
2890 } else {
2891 if (picLevel == PICLevel::SmallPIC)
2892 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2893 else
2894 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2895 }
2896 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2897 GOTPtr, TGA, TGA);
2898 }
2899
2900 if (Model == TLSModel::LocalDynamic) {
2901 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2902 SDValue GOTPtr;
2903 if (is64bit) {
2904 setUsesTOCBasePtr(DAG);
2905 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2906 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2907 GOTReg, TGA);
2908 } else {
2909 if (picLevel == PICLevel::SmallPIC)
2910 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2911 else
2912 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2913 }
2914 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2915 PtrVT, GOTPtr, TGA, TGA);
2916 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2917 PtrVT, TLSAddr, TGA);
2918 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2919 }
2920
2921 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2921)
;
2922}
2923
2924SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2925 SelectionDAG &DAG) const {
2926 EVT PtrVT = Op.getValueType();
2927 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2928 SDLoc DL(GSDN);
2929 const GlobalValue *GV = GSDN->getGlobal();
2930
2931 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
2932 // The actual address of the GlobalValue is stored in the TOC.
2933 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2934 setUsesTOCBasePtr(DAG);
2935 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2936 return getTOCEntry(DAG, DL, GA);
2937 }
2938
2939 unsigned MOHiFlag, MOLoFlag;
2940 bool IsPIC = isPositionIndependent();
2941 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2942
2943 if (IsPIC && Subtarget.isSVR4ABI()) {
2944 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2945 GSDN->getOffset(),
2946 PPCII::MO_PIC_FLAG);
2947 return getTOCEntry(DAG, DL, GA);
2948 }
2949
2950 SDValue GAHi =
2951 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2952 SDValue GALo =
2953 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2954
2955 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2956
2957 // If the global reference is actually to a non-lazy-pointer, we have to do an
2958 // extra load to get the address of the global.
2959 if (MOHiFlag & PPCII::MO_NLP_FLAG)
2960 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2961 return Ptr;
2962}
2963
2964SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2965 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2966 SDLoc dl(Op);
2967
2968 if (Op.getValueType() == MVT::v2i64) {
2969 // When the operands themselves are v2i64 values, we need to do something
2970 // special because VSX has no underlying comparison operations for these.
2971 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2972 // Equality can be handled by casting to the legal type for Altivec
2973 // comparisons, everything else needs to be expanded.
2974 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2975 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2976 DAG.getSetCC(dl, MVT::v4i32,
2977 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2978 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2979 CC));
2980 }
2981
2982 return SDValue();
2983 }
2984
2985 // We handle most of these in the usual way.
2986 return Op;
2987 }
2988
2989 // If we're comparing for equality to zero, expose the fact that this is
2990 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2991 // fold the new nodes.
2992 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2993 return V;
2994
2995 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2996 // Leave comparisons against 0 and -1 alone for now, since they're usually
2997 // optimized. FIXME: revisit this when we can custom lower all setcc
2998 // optimizations.
2999 if (C->isAllOnesValue() || C->isNullValue())
3000 return SDValue();
3001 }
3002
3003 // If we have an integer seteq/setne, turn it into a compare against zero
3004 // by xor'ing the rhs with the lhs, which is faster than setting a
3005 // condition register, reading it back out, and masking the correct bit. The
3006 // normal approach here uses sub to do this instead of xor. Using xor exposes
3007 // the result to other bit-twiddling opportunities.
3008 EVT LHSVT = Op.getOperand(0).getValueType();
3009 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3010 EVT VT = Op.getValueType();
3011 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3012 Op.getOperand(1));
3013 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3014 }
3015 return SDValue();
3016}
3017
3018SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3019 SDNode *Node = Op.getNode();
3020 EVT VT = Node->getValueType(0);
3021 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3022 SDValue InChain = Node->getOperand(0);
3023 SDValue VAListPtr = Node->getOperand(1);
3024 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3025 SDLoc dl(Node);
3026
3027 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3027, __PRETTY_FUNCTION__))
;
3028
3029 // gpr_index
3030 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3031 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3032 InChain = GprIndex.getValue(1);
3033
3034 if (VT == MVT::i64) {
3035 // Check if GprIndex is even
3036 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3037 DAG.getConstant(1, dl, MVT::i32));
3038 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3039 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3040 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3041 DAG.getConstant(1, dl, MVT::i32));
3042 // Align GprIndex to be even if it isn't
3043 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3044 GprIndex);
3045 }
3046
3047 // fpr index is 1 byte after gpr
3048 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3049 DAG.getConstant(1, dl, MVT::i32));
3050
3051 // fpr
3052 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3053 FprPtr, MachinePointerInfo(SV), MVT::i8);
3054 InChain = FprIndex.getValue(1);
3055
3056 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3057 DAG.getConstant(8, dl, MVT::i32));
3058
3059 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3060 DAG.getConstant(4, dl, MVT::i32));
3061
3062 // areas
3063 SDValue OverflowArea =
3064 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3065 InChain = OverflowArea.getValue(1);
3066
3067 SDValue RegSaveArea =
3068 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3069 InChain = RegSaveArea.getValue(1);
3070
3071 // select overflow_area if index > 8
3072 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3073 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3074
3075 // adjustment constant gpr_index * 4/8
3076 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3077 VT.isInteger() ? GprIndex : FprIndex,
3078 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3079 MVT::i32));
3080
3081 // OurReg = RegSaveArea + RegConstant
3082 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3083 RegConstant);
3084
3085 // Floating types are 32 bytes into RegSaveArea
3086 if (VT.isFloatingPoint())
3087 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3088 DAG.getConstant(32, dl, MVT::i32));
3089
3090 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3091 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3092 VT.isInteger() ? GprIndex : FprIndex,
3093 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3094 MVT::i32));
3095
3096 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3097 VT.isInteger() ? VAListPtr : FprPtr,
3098 MachinePointerInfo(SV), MVT::i8);
3099
3100 // determine if we should load from reg_save_area or overflow_area
3101 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3102
3103 // increase overflow_area by 4/8 if gpr/fpr > 8
3104 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3105 DAG.getConstant(VT.isInteger() ? 4 : 8,
3106 dl, MVT::i32));
3107
3108 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3109 OverflowAreaPlusN);
3110
3111 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3112 MachinePointerInfo(), MVT::i32);
3113
3114 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3115}
3116
3117SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3118 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3118, __PRETTY_FUNCTION__))
;
3119
3120 // We have to copy the entire va_list struct:
3121 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3122 return DAG.getMemcpy(Op.getOperand(0), Op,
3123 Op.getOperand(1), Op.getOperand(2),
3124 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3125 false, MachinePointerInfo(), MachinePointerInfo());
3126}
3127
3128SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3129 SelectionDAG &DAG) const {
3130 return Op.getOperand(0);
3131}
3132
3133SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3134 SelectionDAG &DAG) const {
3135 SDValue Chain = Op.getOperand(0);
3136 SDValue Trmp = Op.getOperand(1); // trampoline
3137 SDValue FPtr = Op.getOperand(2); // nested function
3138 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3139 SDLoc dl(Op);
3140
3141 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3142 bool isPPC64 = (PtrVT == MVT::i64);
3143 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3144
3145 TargetLowering::ArgListTy Args;
3146 TargetLowering::ArgListEntry Entry;
3147
3148 Entry.Ty = IntPtrTy;
3149 Entry.Node = Trmp; Args.push_back(Entry);
3150
3151 // TrampSize == (isPPC64 ? 48 : 40);
3152 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3153 isPPC64 ? MVT::i64 : MVT::i32);
3154 Args.push_back(Entry);
3155
3156 Entry.Node = FPtr; Args.push_back(Entry);
3157 Entry.Node = Nest; Args.push_back(Entry);
3158
3159 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3160 TargetLowering::CallLoweringInfo CLI(DAG);
3161 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3162 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3163 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3164
3165 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3166 return CallResult.second;
3167}
3168
3169SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3170 MachineFunction &MF = DAG.getMachineFunction();
3171 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3172 EVT PtrVT = getPointerTy(MF.getDataLayout());
3173
3174 SDLoc dl(Op);
3175
3176 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3177 // vastart just stores the address of the VarArgsFrameIndex slot into the
3178 // memory location argument.
3179 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3180 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3181 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3182 MachinePointerInfo(SV));
3183 }
3184
3185 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3186 // We suppose the given va_list is already allocated.
3187 //
3188 // typedef struct {
3189 // char gpr; /* index into the array of 8 GPRs
3190 // * stored in the register save area
3191 // * gpr=0 corresponds to r3,
3192 // * gpr=1 to r4, etc.
3193 // */
3194 // char fpr; /* index into the array of 8 FPRs
3195 // * stored in the register save area
3196 // * fpr=0 corresponds to f1,
3197 // * fpr=1 to f2, etc.
3198 // */
3199 // char *overflow_arg_area;
3200 // /* location on stack that holds
3201 // * the next overflow argument
3202 // */
3203 // char *reg_save_area;
3204 // /* where r3:r10 and f1:f8 (if saved)
3205 // * are stored
3206 // */
3207 // } va_list[1];
3208
3209 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3210 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3211 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3212 PtrVT);
3213 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3214 PtrVT);
3215
3216 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3217 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3218
3219 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3220 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3221
3222 uint64_t FPROffset = 1;
3223 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3224
3225 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3226
3227 // Store first byte : number of int regs
3228 SDValue firstStore =
3229 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3230 MachinePointerInfo(SV), MVT::i8);
3231 uint64_t nextOffset = FPROffset;
3232 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3233 ConstFPROffset);
3234
3235 // Store second byte : number of float regs
3236 SDValue secondStore =
3237 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3238 MachinePointerInfo(SV, nextOffset), MVT::i8);
3239 nextOffset += StackOffset;
3240 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3241
3242 // Store second word : arguments given on stack
3243 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3244 MachinePointerInfo(SV, nextOffset));
3245 nextOffset += FrameOffset;
3246 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3247
3248 // Store third word : arguments given in registers
3249 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3250 MachinePointerInfo(SV, nextOffset));
3251}
3252
3253/// FPR - The set of FP registers that should be allocated for arguments
3254/// on Darwin and AIX.
3255static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3256 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3257 PPC::F11, PPC::F12, PPC::F13};
3258
3259/// QFPR - The set of QPX registers that should be allocated for arguments.
3260static const MCPhysReg QFPR[] = {
3261 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
3262 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3263
3264/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3265/// the stack.
3266static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3267 unsigned PtrByteSize) {
3268 unsigned ArgSize = ArgVT.getStoreSize();
3269 if (Flags.isByVal())
3270 ArgSize = Flags.getByValSize();
3271
3272 // Round up to multiples of the pointer size, except for array members,
3273 // which are always packed.
3274 if (!Flags.isInConsecutiveRegs())
3275 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3276
3277 return ArgSize;
3278}
3279
3280/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3281/// on the stack.
3282static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3283 ISD::ArgFlagsTy Flags,
3284 unsigned PtrByteSize) {
3285 unsigned Align = PtrByteSize;
3286
3287 // Altivec parameters are padded to a 16 byte boundary.
3288 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3289 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3290 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3291 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3292 Align = 16;
3293 // QPX vector types stored in double-precision are padded to a 32 byte
3294 // boundary.
3295 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3296 Align = 32;
3297
3298 // ByVal parameters are aligned as requested.
3299 if (Flags.isByVal()) {
3300 unsigned BVAlign = Flags.getByValAlign();
3301 if (BVAlign > PtrByteSize) {
3302 if (BVAlign % PtrByteSize != 0)
3303 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3304)
3304 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3304)
;
3305
3306 Align = BVAlign;
3307 }
3308 }
3309
3310 // Array members are always packed to their original alignment.
3311 if (Flags.isInConsecutiveRegs()) {
3312 // If the array member was split into multiple registers, the first
3313 // needs to be aligned to the size of the full type. (Except for
3314 // ppcf128, which is only aligned as its f64 components.)
3315 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3316 Align = OrigVT.getStoreSize();
3317 else
3318 Align = ArgVT.getStoreSize();
3319 }
3320
3321 return Align;
3322}
3323
3324/// CalculateStackSlotUsed - Return whether this argument will use its
3325/// stack slot (instead of being passed in registers). ArgOffset,
3326/// AvailableFPRs, and AvailableVRs must hold the current argument
3327/// position, and will be updated to account for this argument.
3328static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3329 ISD::ArgFlagsTy Flags,
3330 unsigned PtrByteSize,
3331 unsigned LinkageSize,
3332 unsigned ParamAreaSize,
3333 unsigned &ArgOffset,
3334 unsigned &AvailableFPRs,
3335 unsigned &AvailableVRs, bool HasQPX) {
3336 bool UseMemory = false;
3337
3338 // Respect alignment of argument on the stack.
3339 unsigned Align =
3340 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3341 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3342 // If there's no space left in the argument save area, we must
3343 // use memory (this check also catches zero-sized arguments).
3344 if (ArgOffset >= LinkageSize + ParamAreaSize)
3345 UseMemory = true;
3346
3347 // Allocate argument on the stack.
3348 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3349 if (Flags.isInConsecutiveRegsLast())
3350 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3351 // If we overran the argument save area, we must use memory
3352 // (this check catches arguments passed partially in memory)
3353 if (ArgOffset > LinkageSize + ParamAreaSize)
3354 UseMemory = true;
3355
3356 // However, if the argument is actually passed in an FPR or a VR,
3357 // we don't use memory after all.
3358 if (!Flags.isByVal()) {
3359 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3360 // QPX registers overlap with the scalar FP registers.
3361 (HasQPX && (ArgVT == MVT::v4f32 ||
3362 ArgVT == MVT::v4f64 ||
3363 ArgVT == MVT::v4i1)))
3364 if (AvailableFPRs > 0) {
3365 --AvailableFPRs;
3366 return false;
3367 }
3368 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3369 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3370 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3371 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3372 if (AvailableVRs > 0) {
3373 --AvailableVRs;
3374 return false;
3375 }
3376 }
3377
3378 return UseMemory;
3379}
3380
3381/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3382/// ensure minimum alignment required for target.
3383static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3384 unsigned NumBytes) {
3385 unsigned TargetAlign = Lowering->getStackAlignment();
3386 unsigned AlignMask = TargetAlign - 1;
3387 NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3388 return NumBytes;
3389}
3390
3391SDValue PPCTargetLowering::LowerFormalArguments(
3392 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3393 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3394 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3395 if (Subtarget.is64BitELFABI())
3396 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3397 InVals);
3398 else if (Subtarget.is32BitELFABI())
3399 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3400 InVals);
3401
3402 // FIXME: We are using this for both AIX and Darwin. We should add appropriate
3403 // AIX testing, and rename it appropriately.
3404 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3405 InVals);
3406}
3407
3408SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3409 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3410 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3411 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3412
3413 // 32-bit SVR4 ABI Stack Frame Layout:
3414 // +-----------------------------------+
3415 // +--> | Back chain |
3416 // | +-----------------------------------+
3417 // | | Floating-point register save area |
3418 // | +-----------------------------------+
3419 // | | General register save area |
3420 // | +-----------------------------------+
3421 // | | CR save word |
3422 // | +-----------------------------------+
3423 // | | VRSAVE save word |
3424 // | +-----------------------------------+
3425 // | | Alignment padding |
3426 // | +-----------------------------------+
3427 // | | Vector register save area |
3428 // | +-----------------------------------+
3429 // | | Local variable space |
3430 // | +-----------------------------------+
3431 // | | Parameter list area |
3432 // | +-----------------------------------+
3433 // | | LR save word |
3434 // | +-----------------------------------+
3435 // SP--> +--- | Back chain |
3436 // +-----------------------------------+
3437 //
3438 // Specifications:
3439 // System V Application Binary Interface PowerPC Processor Supplement
3440 // AltiVec Technology Programming Interface Manual
3441
3442 MachineFunction &MF = DAG.getMachineFunction();
3443 MachineFrameInfo &MFI = MF.getFrameInfo();
3444 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3445
3446 EVT PtrVT = getPointerTy(MF.getDataLayout());
3447 // Potential tail calls could cause overwriting of argument stack slots.
3448 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3449 (CallConv == CallingConv::Fast));
3450 unsigned PtrByteSize = 4;
3451
3452 // Assign locations to all of the incoming arguments.
3453 SmallVector<CCValAssign, 16> ArgLocs;
3454 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3455 *DAG.getContext());
3456
3457 // Reserve space for the linkage area on the stack.
3458 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3459 CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3460 if (useSoftFloat())
3461 CCInfo.PreAnalyzeFormalArguments(Ins);
3462
3463 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3464 CCInfo.clearWasPPCF128();
3465
3466 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3467 CCValAssign &VA = ArgLocs[i];
3468
3469 // Arguments stored in registers.
3470 if (VA.isRegLoc()) {
3471 const TargetRegisterClass *RC;
3472 EVT ValVT = VA.getValVT();
3473
3474 switch (ValVT.getSimpleVT().SimpleTy) {
3475 default:
3476 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3476)
;
3477 case MVT::i1:
3478 case MVT::i32:
3479 RC = &PPC::GPRCRegClass;
3480 break;
3481 case MVT::f32:
3482 if (Subtarget.hasP8Vector())
3483 RC = &PPC::VSSRCRegClass;
3484 else if (Subtarget.hasSPE())
3485 RC = &PPC::GPRCRegClass;
3486 else
3487 RC = &PPC::F4RCRegClass;
3488 break;
3489 case MVT::f64:
3490 if (Subtarget.hasVSX())
3491 RC = &PPC::VSFRCRegClass;
3492 else if (Subtarget.hasSPE())
3493 // SPE passes doubles in GPR pairs.
3494 RC = &PPC::GPRCRegClass;
3495 else
3496 RC = &PPC::F8RCRegClass;
3497 break;
3498 case MVT::v16i8:
3499 case MVT::v8i16:
3500 case MVT::v4i32:
3501 RC = &PPC::VRRCRegClass;
3502 break;
3503 case MVT::v4f32:
3504 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3505 break;
3506 case MVT::v2f64:
3507 case MVT::v2i64:
3508 RC = &PPC::VRRCRegClass;
3509 break;
3510 case MVT::v4f64:
3511 RC = &PPC::QFRCRegClass;
3512 break;
3513 case MVT::v4i1:
3514 RC = &PPC::QBRCRegClass;
3515 break;
3516 }
3517
3518 SDValue ArgValue;
3519 // Transform the arguments stored in physical registers into
3520 // virtual ones.
3521 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3522 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3522, __PRETTY_FUNCTION__))
;
3523 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3524 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3525 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3526 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3527 if (!Subtarget.isLittleEndian())
3528 std::swap (ArgValueLo, ArgValueHi);
3529 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3530 ArgValueHi);
3531 } else {
3532 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3533 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3534 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3535 if (ValVT == MVT::i1)
3536 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3537 }
3538
3539 InVals.push_back(ArgValue);
3540 } else {
3541 // Argument stored in memory.
3542 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3542, __PRETTY_FUNCTION__))
;
3543
3544 // Get the extended size of the argument type in stack
3545 unsigned ArgSize = VA.getLocVT().getStoreSize();
3546 // Get the actual size of the argument type
3547 unsigned ObjSize = VA.getValVT().getStoreSize();
3548 unsigned ArgOffset = VA.getLocMemOffset();
3549 // Stack objects in PPC32 are right justified.
3550 ArgOffset += ArgSize - ObjSize;
3551 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3552
3553 // Create load nodes to retrieve arguments from the stack.
3554 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3555 InVals.push_back(
3556 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3557 }
3558 }
3559
3560 // Assign locations to all of the incoming aggregate by value arguments.
3561 // Aggregates passed by value are stored in the local variable space of the
3562 // caller's stack frame, right above the parameter list area.
3563 SmallVector<CCValAssign, 16> ByValArgLocs;
3564 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3565 ByValArgLocs, *DAG.getContext());
3566
3567 // Reserve stack space for the allocations in CCInfo.
3568 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3569
3570 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3571
3572 // Area that is at least reserved in the caller of this function.
3573 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3574 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3575
3576 // Set the size that is at least reserved in caller of this function. Tail
3577 // call optimized function's reserved stack space needs to be aligned so that
3578 // taking the difference between two stack areas will result in an aligned
3579 // stack.
3580 MinReservedArea =
3581 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3582 FuncInfo->setMinReservedArea(MinReservedArea);
3583
3584 SmallVector<SDValue, 8> MemOps;
3585
3586 // If the function takes variable number of arguments, make a frame index for
3587 // the start of the first vararg value... for expansion of llvm.va_start.
3588 if (isVarArg) {
3589 static const MCPhysReg GPArgRegs[] = {
3590 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3591 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3592 };
3593 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3594
3595 static const MCPhysReg FPArgRegs[] = {
3596 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3597 PPC::F8
3598 };
3599 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3600
3601 if (useSoftFloat() || hasSPE())
3602 NumFPArgRegs = 0;
3603
3604 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3605 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3606
3607 // Make room for NumGPArgRegs and NumFPArgRegs.
3608 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3609 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3610
3611 FuncInfo->setVarArgsStackOffset(
3612 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3613 CCInfo.getNextStackOffset(), true));
3614
3615 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3616 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3617
3618 // The fixed integer arguments of a variadic function are stored to the
3619 // VarArgsFrameIndex on the stack so that they may be loaded by
3620 // dereferencing the result of va_next.
3621 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3622 // Get an existing live-in vreg, or add a new one.
3623 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3624 if (!VReg)
3625 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3626
3627 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3628 SDValue Store =
3629 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3630 MemOps.push_back(Store);
3631 // Increment the address by four for the next argument to store
3632 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3633 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3634 }
3635
3636 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3637 // is set.
3638 // The double arguments are stored to the VarArgsFrameIndex
3639 // on the stack.
3640 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3641 // Get an existing live-in vreg, or add a new one.
3642 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3643 if (!VReg)
3644 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3645
3646 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3647 SDValue Store =
3648 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3649 MemOps.push_back(Store);
3650 // Increment the address by eight for the next argument to store
3651 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3652 PtrVT);
3653 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3654 }
3655 }
3656
3657 if (!MemOps.empty())
3658 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3659
3660 return Chain;
3661}
3662
3663// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3664// value to MVT::i64 and then truncate to the correct register size.
3665SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3666 EVT ObjectVT, SelectionDAG &DAG,
3667 SDValue ArgVal,
3668 const SDLoc &dl) const {
3669 if (Flags.isSExt())
3670 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3671 DAG.getValueType(ObjectVT));
3672 else if (Flags.isZExt())
3673 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3674 DAG.getValueType(ObjectVT));
3675
3676 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3677}
3678
3679SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3680 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3681 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3682 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3683 // TODO: add description of PPC stack frame format, or at least some docs.
3684 //
3685 bool isELFv2ABI = Subtarget.isELFv2ABI();
3686 bool isLittleEndian = Subtarget.isLittleEndian();
3687 MachineFunction &MF = DAG.getMachineFunction();
3688 MachineFrameInfo &MFI = MF.getFrameInfo();
3689 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3690
3691 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3692, __PRETTY_FUNCTION__))
3692 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3692, __PRETTY_FUNCTION__))
;
3693
3694 EVT PtrVT = getPointerTy(MF.getDataLayout());
3695 // Potential tail calls could cause overwriting of argument stack slots.
3696 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3697 (CallConv == CallingConv::Fast));
3698 unsigned PtrByteSize = 8;
3699 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3700
3701 static const MCPhysReg GPR[] = {
3702 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3703 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3704 };
3705 static const MCPhysReg VR[] = {
3706 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3707 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3708 };
3709
3710 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3711 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3712 const unsigned Num_VR_Regs = array_lengthof(VR);
3713 const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3714
3715 // Do a first pass over the arguments to determine whether the ABI
3716 // guarantees that our caller has allocated the parameter save area
3717 // on its stack frame. In the ELFv1 ABI, this is always the case;
3718 // in the ELFv2 ABI, it is true if this is a vararg function or if
3719 // any parameter is located in a stack slot.
3720
3721 bool HasParameterArea = !isELFv2ABI || isVarArg;
3722 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3723 unsigned NumBytes = LinkageSize;
3724 unsigned AvailableFPRs = Num_FPR_Regs;
3725 unsigned AvailableVRs = Num_VR_Regs;
3726 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3727 if (Ins[i].Flags.isNest())
3728 continue;
3729
3730 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3731 PtrByteSize, LinkageSize, ParamAreaSize,
3732 NumBytes, AvailableFPRs, AvailableVRs,
3733 Subtarget.hasQPX()))
3734 HasParameterArea = true;
3735 }
3736
3737 // Add DAG nodes to load the arguments or copy them out of registers. On
3738 // entry to a function on PPC, the arguments start after the linkage area,
3739 // although the first ones are often in registers.
3740
3741 unsigned ArgOffset = LinkageSize;
3742 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3743 unsigned &QFPR_idx = FPR_idx;
3744 SmallVector<SDValue, 8> MemOps;
3745 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3746 unsigned CurArgIdx = 0;
3747 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3748 SDValue ArgVal;
3749 bool needsLoad = false;
3750 EVT ObjectVT = Ins[ArgNo].VT;
3751 EVT OrigVT = Ins[ArgNo].ArgVT;
3752 unsigned ObjSize = ObjectVT.getStoreSize();
3753 unsigned ArgSize = ObjSize;
3754 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3755 if (Ins[ArgNo].isOrigArg()) {
3756 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3757 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3758 }
3759 // We re-align the argument offset for each argument, except when using the
3760 // fast calling convention, when we need to make sure we do that only when
3761 // we'll actually use a stack slot.
3762 unsigned CurArgOffset, Align;
3763 auto ComputeArgOffset = [&]() {
3764 /* Respect alignment of argument on the stack. */
3765 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3766 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3767 CurArgOffset = ArgOffset;
3768 };
3769
3770 if (CallConv != CallingConv::Fast) {
3771 ComputeArgOffset();
3772
3773 /* Compute GPR index associated with argument offset. */
3774 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3775 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3776 }
3777
3778 // FIXME the codegen can be much improved in some cases.
3779 // We do not have to keep everything in memory.
3780 if (Flags.isByVal()) {
3781 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3781, __PRETTY_FUNCTION__))
;
3782
3783 if (CallConv == CallingConv::Fast)
3784 ComputeArgOffset();
3785
3786 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3787 ObjSize = Flags.getByValSize();
3788 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3789 // Empty aggregate parameters do not take up registers. Examples:
3790 // struct { } a;
3791 // union { } b;
3792 // int c[0];
3793 // etc. However, we have to provide a place-holder in InVals, so
3794 // pretend we have an 8-byte item at the current address for that
3795 // purpose.
3796 if (!ObjSize) {
3797 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3798 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3799 InVals.push_back(FIN);
3800 continue;
3801 }
3802
3803 // Create a stack object covering all stack doublewords occupied
3804 // by the argument. If the argument is (fully or partially) on
3805 // the stack, or if the argument is fully in registers but the
3806 // caller has allocated the parameter save anyway, we can refer
3807 // directly to the caller's stack frame. Otherwise, create a
3808 // local copy in our own frame.
3809 int FI;
3810 if (HasParameterArea ||
3811 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3812 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3813 else
3814 FI = MFI.CreateStackObject(ArgSize, Align, false);
3815 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3816
3817 // Handle aggregates smaller than 8 bytes.
3818 if (ObjSize < PtrByteSize) {
3819 // The value of the object is its address, which differs from the
3820 // address of the enclosing doubleword on big-endian systems.
3821 SDValue Arg = FIN;
3822 if (!isLittleEndian) {
3823 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3824 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3825 }
3826 InVals.push_back(Arg);
3827
3828 if (GPR_idx != Num_GPR_Regs) {
3829 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3830 FuncInfo->addLiveInAttr(VReg, Flags);
3831 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3832 SDValue Store;
3833
3834 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3835 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3836 (ObjSize == 2 ? MVT::i16 : MVT::i32));
3837 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3838 MachinePointerInfo(&*FuncArg), ObjType);
3839 } else {
3840 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3841 // store the whole register as-is to the parameter save area
3842 // slot.
3843 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3844 MachinePointerInfo(&*FuncArg));
3845 }
3846
3847 MemOps.push_back(Store);
3848 }
3849 // Whether we copied from a register or not, advance the offset
3850 // into the parameter save area by a full doubleword.
3851 ArgOffset += PtrByteSize;
3852 continue;
3853 }
3854
3855 // The value of the object is its address, which is the address of
3856 // its first stack doubleword.
3857 InVals.push_back(FIN);
3858
3859 // Store whatever pieces of the object are in registers to memory.
3860 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3861 if (GPR_idx == Num_GPR_Regs)
3862 break;
3863
3864 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3865 FuncInfo->addLiveInAttr(VReg, Flags);
3866 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3867 SDValue Addr = FIN;
3868 if (j) {
3869 SDValue Off = DAG.getConstant(j, dl, PtrVT);
3870 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3871 }
3872 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3873 MachinePointerInfo(&*FuncArg, j));
3874 MemOps.push_back(Store);
3875 ++GPR_idx;
3876 }
3877 ArgOffset += ArgSize;
3878 continue;
3879 }
3880
3881 switch (ObjectVT.getSimpleVT().SimpleTy) {
3882 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3882)
;
3883 case MVT::i1:
3884 case MVT::i32:
3885 case MVT::i64:
3886 if (Flags.isNest()) {
3887 // The 'nest' parameter, if any, is passed in R11.
3888 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3889 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3890
3891 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3892 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3893
3894 break;
3895 }
3896
3897 // These can be scalar arguments or elements of an integer array type
3898 // passed directly. Clang may use those instead of "byval" aggregate
3899 // types to avoid forcing arguments to memory unnecessarily.
3900 if (GPR_idx != Num_GPR_Regs) {
3901 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3902 FuncInfo->addLiveInAttr(VReg, Flags);
3903 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3904
3905 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3906 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3907 // value to MVT::i64 and then truncate to the correct register size.
3908 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3909 } else {
3910 if (CallConv == CallingConv::Fast)
3911 ComputeArgOffset();
3912
3913 needsLoad = true;
3914 ArgSize = PtrByteSize;
3915 }
3916 if (CallConv != CallingConv::Fast || needsLoad)
3917 ArgOffset += 8;
3918 break;
3919
3920 case MVT::f32:
3921 case MVT::f64:
3922 // These can be scalar arguments or elements of a float array type
3923 // passed directly. The latter are used to implement ELFv2 homogenous
3924 // float aggregates.
3925 if (FPR_idx != Num_FPR_Regs) {
3926 unsigned VReg;
3927
3928 if (ObjectVT == MVT::f32)
3929 VReg = MF.addLiveIn(FPR[FPR_idx],
3930 Subtarget.hasP8Vector()
3931 ? &PPC::VSSRCRegClass
3932 : &PPC::F4RCRegClass);
3933 else
3934 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3935 ? &PPC::VSFRCRegClass
3936 : &PPC::F8RCRegClass);
3937
3938 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3939 ++FPR_idx;
3940 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3941 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3942 // once we support fp <-> gpr moves.
3943
3944 // This can only ever happen in the presence of f32 array types,
3945 // since otherwise we never run out of FPRs before running out
3946 // of GPRs.
3947 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3948 FuncInfo->addLiveInAttr(VReg, Flags);
3949 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3950
3951 if (ObjectVT == MVT::f32) {
3952 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3953 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3954 DAG.getConstant(32, dl, MVT::i32));
3955 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3956 }
3957
3958 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3959 } else {
3960 if (CallConv == CallingConv::Fast)
3961 ComputeArgOffset();
3962
3963 needsLoad = true;
3964 }
3965
3966 // When passing an array of floats, the array occupies consecutive
3967 // space in the argument area; only round up to the next doubleword
3968 // at the end of the array. Otherwise, each float takes 8 bytes.
3969 if (CallConv != CallingConv::Fast || needsLoad) {
3970 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3971 ArgOffset += ArgSize;
3972 if (Flags.isInConsecutiveRegsLast())
3973 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3974 }
3975 break;
3976 case MVT::v4f32:
3977 case MVT::v4i32:
3978 case MVT::v8i16:
3979 case MVT::v16i8:
3980 case MVT::v2f64:
3981 case MVT::v2i64:
3982 case MVT::v1i128:
3983 case MVT::f128:
3984 if (!Subtarget.hasQPX()) {
3985 // These can be scalar arguments or elements of a vector array type
3986 // passed directly. The latter are used to implement ELFv2 homogenous
3987 // vector aggregates.
3988 if (VR_idx != Num_VR_Regs) {
3989 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3990 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3991 ++VR_idx;
3992 } else {
3993 if (CallConv == CallingConv::Fast)
3994 ComputeArgOffset();
3995 needsLoad = true;
3996 }
3997 if (CallConv != CallingConv::Fast || needsLoad)
3998 ArgOffset += 16;
3999 break;
4000 } // not QPX
4001
4002 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&((ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"
) ? static_cast<void> (0) : __assert_fail ("ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4003, __PRETTY_FUNCTION__))
4003 "Invalid QPX parameter type")((ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"
) ? static_cast<void> (0) : __assert_fail ("ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4003, __PRETTY_FUNCTION__))
;
4004 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4005
4006 case MVT::v4f64:
4007 case MVT::v4i1:
4008 // QPX vectors are treated like their scalar floating-point subregisters
4009 // (except that they're larger).
4010 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
4011 if (QFPR_idx != Num_QFPR_Regs) {
4012 const TargetRegisterClass *RC;
4013 switch (ObjectVT.getSimpleVT().SimpleTy) {
4014 case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
4015 case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
4016 default: RC = &PPC::QBRCRegClass; break;
4017 }
4018
4019 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4020 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4021 ++QFPR_idx;
4022 } else {
4023 if (CallConv == CallingConv::Fast)
4024 ComputeArgOffset();
4025 needsLoad = true;
4026 }
4027 if (CallConv != CallingConv::Fast || needsLoad)
4028 ArgOffset += Sz;
4029 break;
4030 }
4031
4032 // We need to load the argument to a virtual register if we determined
4033 // above that we ran out of physical registers of the appropriate type.
4034 if (needsLoad) {
4035 if (ObjSize < ArgSize && !isLittleEndian)
4036 CurArgOffset += ArgSize - ObjSize;
4037 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4038 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4039 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4040 }
4041
4042 InVals.push_back(ArgVal);
4043 }
4044
4045 // Area that is at least reserved in the caller of this function.
4046 unsigned MinReservedArea;
4047 if (HasParameterArea)
4048 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4049 else
4050 MinReservedArea = LinkageSize;
4051
4052 // Set the size that is at least reserved in caller of this function. Tail
4053 // call optimized functions' reserved stack space needs to be aligned so that
4054 // taking the difference between two stack areas will result in an aligned
4055 // stack.
4056 MinReservedArea =
4057 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4058 FuncInfo->setMinReservedArea(MinReservedArea);
4059
4060 // If the function takes variable number of arguments, make a frame index for
4061 // the start of the first vararg value... for expansion of llvm.va_start.
4062 if (isVarArg) {
4063 int Depth = ArgOffset;
4064
4065 FuncInfo->setVarArgsFrameIndex(
4066 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4067 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4068
4069 // If this function is vararg, store any remaining integer argument regs
4070 // to their spots on the stack so that they may be loaded by dereferencing
4071 // the result of va_next.
4072 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4073 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4074 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4075 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4076 SDValue Store =
4077 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4078 MemOps.push_back(Store);
4079 // Increment the address by four for the next argument to store
4080 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4081 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4082 }
4083 }
4084
4085 if (!MemOps.empty())
4086 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4087
4088 return Chain;
4089}
4090
4091SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4092 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4093 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4094 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4095 // TODO: add description of PPC stack frame format, or at least some docs.
4096 //
4097 MachineFunction &MF = DAG.getMachineFunction();
4098 MachineFrameInfo &MFI = MF.getFrameInfo();
4099 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4100
4101 EVT PtrVT = getPointerTy(MF.getDataLayout());
4102 bool isPPC64 = PtrVT == MVT::i64;
4103 // Potential tail calls could cause overwriting of argument stack slots.
4104 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4105 (CallConv == CallingConv::Fast));
4106 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4107 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4108 unsigned ArgOffset = LinkageSize;
4109 // Area that is at least reserved in caller of this function.
4110 unsigned MinReservedArea = ArgOffset;
4111
4112 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4113 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4114 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4115 };
4116 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4117 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4118 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4119 };
4120 static const MCPhysReg VR[] = {
4121 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4122 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4123 };
4124
4125 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4126 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4127 const unsigned Num_VR_Regs = array_lengthof( VR);
4128
4129 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4130
4131 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4132
4133 // In 32-bit non-varargs functions, the stack space for vectors is after the
4134 // stack space for non-vectors. We do not use this space unless we have
4135 // too many vectors to fit in registers, something that only occurs in
4136 // constructed examples:), but we have to walk the arglist to figure
4137 // that out...for the pathological case, compute VecArgOffset as the
4138 // start of the vector parameter area. Computing VecArgOffset is the
4139 // entire point of the following loop.
4140 unsigned VecArgOffset = ArgOffset;
4141 if (!isVarArg && !isPPC64) {
4142 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4143 ++ArgNo) {
4144 EVT ObjectVT = Ins[ArgNo].VT;
4145 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4146
4147 if (Flags.isByVal()) {
4148 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4149 unsigned ObjSize = Flags.getByValSize();
4150 unsigned ArgSize =
4151 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4152 VecArgOffset += ArgSize;
4153 continue;
4154 }
4155
4156 switch(ObjectVT.getSimpleVT().SimpleTy) {
4157 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4157)
;
4158 case MVT::i1:
4159 case MVT::i32:
4160 case MVT::f32:
4161 VecArgOffset += 4;
4162 break;
4163 case MVT::i64: // PPC64
4164 case MVT::f64:
4165 // FIXME: We are guaranteed to be !isPPC64 at this point.
4166 // Does MVT::i64 apply?
4167 VecArgOffset += 8;
4168 break;
4169 case MVT::v4f32:
4170 case MVT::v4i32:
4171 case MVT::v8i16:
4172 case MVT::v16i8:
4173 // Nothing to do, we're only looking at Nonvector args here.
4174 break;
4175 }
4176 }
4177 }
4178 // We've found where the vector parameter area in memory is. Skip the
4179 // first 12 parameters; these don't use that memory.
4180 VecArgOffset = ((VecArgOffset+15)/16)*16;
4181 VecArgOffset += 12*16;
4182
4183 // Add DAG nodes to load the arguments or copy them out of registers. On
4184 // entry to a function on PPC, the arguments start after the linkage area,
4185 // although the first ones are often in registers.
4186
4187 SmallVector<SDValue, 8> MemOps;
4188 unsigned nAltivecParamsAtEnd = 0;
4189 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4190 unsigned CurArgIdx = 0;
4191 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4192 SDValue ArgVal;
4193 bool needsLoad = false;
4194 EVT ObjectVT = Ins[ArgNo].VT;
4195 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4196 unsigned ArgSize = ObjSize;
4197 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4198 if (Ins[ArgNo].isOrigArg()) {
4199 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4200 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4201 }
4202 unsigned CurArgOffset = ArgOffset;
4203
4204 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4205 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4206 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4207 if (isVarArg || isPPC64) {
4208 MinReservedArea = ((MinReservedArea+15)/16)*16;
4209 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4210 Flags,
4211 PtrByteSize);
4212 } else nAltivecParamsAtEnd++;
4213 } else
4214 // Calculate min reserved area.
4215 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4216 Flags,
4217 PtrByteSize);
4218
4219 // FIXME the codegen can be much improved in some cases.
4220 // We do not have to keep everything in memory.
4221 if (Flags.isByVal()) {
4222 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4222, __PRETTY_FUNCTION__))
;
4223
4224 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4225 ObjSize = Flags.getByValSize();
4226 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4227 // Objects of size 1 and 2 are right justified, everything else is
4228 // left justified. This means the memory address is adjusted forwards.
4229 if (ObjSize==1 || ObjSize==2) {
4230 CurArgOffset = CurArgOffset + (4 - ObjSize);
4231 }
4232 // The value of the object is its address.
4233 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4234 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4235 InVals.push_back(FIN);
4236 if (ObjSize==1 || ObjSize==2) {
4237 if (GPR_idx != Num_GPR_Regs) {
4238 unsigned VReg;
4239 if (isPPC64)
4240 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4241 else
4242 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4243 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4244 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4245 SDValue Store =
4246 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4247 MachinePointerInfo(&*FuncArg), ObjType);
4248 MemOps.push_back(Store);
4249 ++GPR_idx;
4250 }
4251
4252 ArgOffset += PtrByteSize;
4253
4254 continue;
4255 }
4256 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4257 // Store whatever pieces of the object are in registers
4258 // to memory. ArgOffset will be the address of the beginning
4259 // of the object.
4260 if (GPR_idx != Num_GPR_Regs) {
4261 unsigned VReg;
4262 if (isPPC64)
4263 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4264 else
4265 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4266 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4267 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4268 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4269 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4270 MachinePointerInfo(&*FuncArg, j));
4271 MemOps.push_back(Store);
4272 ++GPR_idx;
4273 ArgOffset += PtrByteSize;
4274 } else {
4275 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4276 break;
4277 }
4278 }
4279 continue;
4280 }
4281
4282 switch (ObjectVT.getSimpleVT().SimpleTy) {
4283 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4283)
;
4284 case MVT::i1:
4285 case MVT::i32:
4286 if (!isPPC64) {
4287 if (GPR_idx != Num_GPR_Regs) {
4288 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4289 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4290
4291 if (ObjectVT == MVT::i1)
4292 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4293
4294 ++GPR_idx;
4295 } else {
4296 needsLoad = true;
4297 ArgSize = PtrByteSize;
4298 }
4299 // All int arguments reserve stack space in the Darwin ABI.
4300 ArgOffset += PtrByteSize;
4301 break;
4302 }
4303 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4304 case MVT::i64: // PPC64
4305 if (GPR_idx != Num_GPR_Regs) {
4306 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4307 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4308
4309 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4310 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4311 // value to MVT::i64 and then truncate to the correct register size.
4312 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4313
4314 ++GPR_idx;
4315 } else {
4316 needsLoad = true;
4317 ArgSize = PtrByteSize;
4318 }
4319 // All int arguments reserve stack space in the Darwin ABI.
4320 ArgOffset += 8;
4321 break;
4322
4323 case MVT::f32:
4324 case MVT::f64:
4325 // Every 4 bytes of argument space consumes one of the GPRs available for
4326 // argument passing.
4327 if (GPR_idx != Num_GPR_Regs) {
4328 ++GPR_idx;
4329 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4330 ++GPR_idx;
4331 }
4332 if (FPR_idx != Num_FPR_Regs) {
4333 unsigned VReg;
4334
4335 if (ObjectVT == MVT::f32)
4336 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4337 else
4338 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4339
4340 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4341 ++FPR_idx;
4342 } else {
4343 needsLoad = true;
4344 }
4345
4346 // All FP arguments reserve stack space in the Darwin ABI.
4347 ArgOffset += isPPC64 ? 8 : ObjSize;
4348 break;
4349 case MVT::v4f32:
4350 case MVT::v4i32:
4351 case MVT::v8i16:
4352 case MVT::v16i8:
4353 // Note that vector arguments in registers don't reserve stack space,
4354 // except in varargs functions.
4355 if (VR_idx != Num_VR_Regs) {
4356 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4357 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4358 if (isVarArg) {
4359 while ((ArgOffset % 16) != 0) {
4360 ArgOffset += PtrByteSize;
4361 if (GPR_idx != Num_GPR_Regs)
4362 GPR_idx++;
4363 }
4364 ArgOffset += 16;
4365 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4366 }
4367 ++VR_idx;
4368 } else {
4369 if (!isVarArg && !isPPC64) {
4370 // Vectors go after all the nonvectors.
4371 CurArgOffset = VecArgOffset;
4372 VecArgOffset += 16;
4373 } else {
4374 // Vectors are aligned.
4375 ArgOffset = ((ArgOffset+15)/16)*16;
4376 CurArgOffset = ArgOffset;
4377 ArgOffset += 16;
4378 }
4379 needsLoad = true;
4380 }
4381 break;
4382 }
4383
4384 // We need to load the argument to a virtual register if we determined above
4385 // that we ran out of physical registers of the appropriate type.
4386 if (needsLoad) {
4387 int FI = MFI.CreateFixedObject(ObjSize,
4388 CurArgOffset + (ArgSize - ObjSize),
4389 isImmutable);
4390 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4391 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4392 }
4393
4394 InVals.push_back(ArgVal);
4395 }
4396
4397 // Allow for Altivec parameters at the end, if needed.
4398 if (nAltivecParamsAtEnd) {
4399 MinReservedArea = ((MinReservedArea+15)/16)*16;
4400 MinReservedArea += 16*nAltivecParamsAtEnd;
4401 }
4402
4403 // Area that is at least reserved in the caller of this function.
4404 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4405
4406 // Set the size that is at least reserved in caller of this function. Tail
4407 // call optimized functions' reserved stack space needs to be aligned so that
4408 // taking the difference between two stack areas will result in an aligned
4409 // stack.
4410 MinReservedArea =
4411 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4412 FuncInfo->setMinReservedArea(MinReservedArea);
4413
4414 // If the function takes variable number of arguments, make a frame index for
4415 // the start of the first vararg value... for expansion of llvm.va_start.
4416 if (isVarArg) {
4417 int Depth = ArgOffset;
4418
4419 FuncInfo->setVarArgsFrameIndex(
4420 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4421 Depth, true));
4422 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4423
4424 // If this function is vararg, store any remaining integer argument regs
4425 // to their spots on the stack so that they may be loaded by dereferencing
4426 // the result of va_next.
4427 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4428 unsigned VReg;
4429
4430 if (isPPC64)
4431 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4432 else
4433 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4434
4435 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4436 SDValue Store =
4437 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4438 MemOps.push_back(Store);
4439 // Increment the address by four for the next argument to store
4440 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4441 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4442 }
4443 }
4444
4445 if (!MemOps.empty())
4446 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4447
4448 return Chain;
4449}
4450
4451/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4452/// adjusted to accommodate the arguments for the tailcall.
4453static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4454 unsigned ParamSize) {
4455
4456 if (!isTailCall) return 0;
4457
4458 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4459 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4460 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4461 // Remember only if the new adjustment is bigger.
4462 if (SPDiff < FI->getTailCallSPDelta())
4463 FI->setTailCallSPDelta(SPDiff);
4464
4465 return SPDiff;
4466}
4467
4468static bool isFunctionGlobalAddress(SDValue Callee);
4469
4470static bool
4471callsShareTOCBase(const Function *Caller, SDValue Callee,
4472 const TargetMachine &TM) {
4473 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4474 // don't have enough information to determine if the caller and calle share
4475 // the same TOC base, so we have to pessimistically assume they don't for
4476 // correctness.
4477 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4478 if (!G)
4479 return false;
4480
4481 const GlobalValue *GV = G->getGlobal();
4482 // The medium and large code models are expected to provide a sufficiently
4483 // large TOC to provide all data addressing needs of a module with a
4484 // single TOC. Since each module will be addressed with a single TOC then we
4485 // only need to check that caller and callee don't cross dso boundaries.
4486 if (CodeModel::Medium == TM.getCodeModel() ||
4487 CodeModel::Large == TM.getCodeModel())
4488 return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4489
4490 // Otherwise we need to ensure callee and caller are in the same section,
4491 // since the linker may allocate multiple TOCs, and we don't know which
4492 // sections will belong to the same TOC base.
4493
4494 if (!GV->isStrongDefinitionForLinker())
4495 return false;
4496
4497 // Any explicitly-specified sections and section prefixes must also match.
4498 // Also, if we're using -ffunction-sections, then each function is always in
4499 // a different section (the same is true for COMDAT functions).
4500 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4501 GV->getSection() != Caller->getSection())
4502 return false;
4503 if (const auto *F = dyn_cast<Function>(GV)) {
4504 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4505 return false;
4506 }
4507
4508 // If the callee might be interposed, then we can't assume the ultimate call
4509 // target will be in the same section. Even in cases where we can assume that
4510 // interposition won't happen, in any case where the linker might insert a
4511 // stub to allow for interposition, we must generate code as though
4512 // interposition might occur. To understand why this matters, consider a
4513 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4514 // in the same section, but a is in a different module (i.e. has a different
4515 // TOC base pointer). If the linker allows for interposition between b and c,
4516 // then it will generate a stub for the call edge between b and c which will
4517 // save the TOC pointer into the designated stack slot allocated by b. If we
4518 // return true here, and therefore allow a tail call between b and c, that
4519 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4520 // pointer into the stack slot allocated by a (where the a -> b stub saved
4521 // a's TOC base pointer). If we're not considering a tail call, but rather,
4522 // whether a nop is needed after the call instruction in b, because the linker
4523 // will insert a stub, it might complain about a missing nop if we omit it
4524 // (although many don't complain in this case).
4525 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4526 return false;
4527
4528 return true;
4529}
4530
4531static bool
4532needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4533 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4534 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4534, __PRETTY_FUNCTION__))
;
4535
4536 const unsigned PtrByteSize = 8;
4537 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4538
4539 static const MCPhysReg GPR[] = {
4540 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4541 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4542 };
4543 static const MCPhysReg VR[] = {
4544 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4545 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4546 };
4547
4548 const unsigned NumGPRs = array_lengthof(GPR);
4549 const unsigned NumFPRs = 13;
4550 const unsigned NumVRs = array_lengthof(VR);
4551 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4552
4553 unsigned NumBytes = LinkageSize;
4554 unsigned AvailableFPRs = NumFPRs;
4555 unsigned AvailableVRs = NumVRs;
4556
4557 for (const ISD::OutputArg& Param : Outs) {
4558 if (Param.Flags.isNest()) continue;
4559
4560 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4561 PtrByteSize, LinkageSize, ParamAreaSize,
4562 NumBytes, AvailableFPRs, AvailableVRs,
4563 Subtarget.hasQPX()))
4564 return true;
4565 }
4566 return false;
4567}
4568
4569static bool
4570hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
4571 if (CS.arg_size() != CallerFn->arg_size())
4572 return false;
4573
4574 ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4575 ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4576 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4577
4578 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4579 const Value* CalleeArg = *CalleeArgIter;
4580 const Value* CallerArg = &(*CallerArgIter);
4581 if (CalleeArg == CallerArg)
4582 continue;
4583
4584 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4585 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4586 // }
4587 // 1st argument of callee is undef and has the same type as caller.
4588 if (CalleeArg->getType() == CallerArg->getType() &&
4589 isa<UndefValue>(CalleeArg))
4590 continue;
4591
4592 return false;
4593 }
4594
4595 return true;
4596}
4597
4598// Returns true if TCO is possible between the callers and callees
4599// calling conventions.
4600static bool
4601areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4602 CallingConv::ID CalleeCC) {
4603 // Tail calls are possible with fastcc and ccc.
4604 auto isTailCallableCC = [] (CallingConv::ID CC){
4605 return CC == CallingConv::C || CC == CallingConv::Fast;
4606 };
4607 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4608 return false;
4609
4610 // We can safely tail call both fastcc and ccc callees from a c calling
4611 // convention caller. If the caller is fastcc, we may have less stack space
4612 // than a non-fastcc caller with the same signature so disable tail-calls in
4613 // that case.
4614 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4615}
4616
4617bool
4618PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4619 SDValue Callee,
4620 CallingConv::ID CalleeCC,
4621 ImmutableCallSite CS,
4622 bool isVarArg,
4623 const SmallVectorImpl<ISD::OutputArg> &Outs,
4624 const SmallVectorImpl<ISD::InputArg> &Ins,
4625 SelectionDAG& DAG) const {
4626 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4627
4628 if (DisableSCO && !TailCallOpt) return false;
4629
4630 // Variadic argument functions are not supported.
4631 if (isVarArg) return false;
4632
4633 auto &Caller = DAG.getMachineFunction().getFunction();
4634 // Check that the calling conventions are compatible for tco.
4635 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4636 return false;
4637
4638 // Caller contains any byval parameter is not supported.
4639 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4640 return false;
4641
4642 // Callee contains any byval parameter is not supported, too.
4643 // Note: This is a quick work around, because in some cases, e.g.
4644 // caller's stack size > callee's stack size, we are still able to apply
4645 // sibling call optimization. For example, gcc is able to do SCO for caller1
4646 // in the following example, but not for caller2.
4647 // struct test {
4648 // long int a;
4649 // char ary[56];
4650 // } gTest;
4651 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4652 // b->a = v.a;
4653 // return 0;
4654 // }
4655 // void caller1(struct test a, struct test c, struct test *b) {
4656 // callee(gTest, b); }
4657 // void caller2(struct test *b) { callee(gTest, b); }
4658 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4659 return false;
4660
4661 // If callee and caller use different calling conventions, we cannot pass
4662 // parameters on stack since offsets for the parameter area may be different.
4663 if (Caller.getCallingConv() != CalleeCC &&
4664 needStackSlotPassParameters(Subtarget, Outs))
4665 return false;
4666
4667 // No TCO/SCO on indirect call because Caller have to restore its TOC
4668 if (!isFunctionGlobalAddress(Callee) &&
4669 !isa<ExternalSymbolSDNode>(Callee))
4670 return false;
4671
4672 // If the caller and callee potentially have different TOC bases then we
4673 // cannot tail call since we need to restore the TOC pointer after the call.
4674 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4675 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4676 return false;
4677
4678 // TCO allows altering callee ABI, so we don't have to check further.
4679 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4680 return true;
4681
4682 if (DisableSCO) return false;
4683
4684 // If callee use the same argument list that caller is using, then we can
4685 // apply SCO on this case. If it is not, then we need to check if callee needs
4686 // stack for passing arguments.
4687 if (!hasSameArgumentList(&Caller, CS) &&
4688 needStackSlotPassParameters(Subtarget, Outs)) {
4689 return false;
4690 }
4691
4692 return true;
4693}
4694
4695/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4696/// for tail call optimization. Targets which want to do tail call
4697/// optimization should implement this function.
4698bool
4699PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4700 CallingConv::ID CalleeCC,
4701 bool isVarArg,
4702 const SmallVectorImpl<ISD::InputArg> &Ins,
4703 SelectionDAG& DAG) const {
4704 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4705 return false;
4706
4707 // Variable argument functions are not supported.
4708 if (isVarArg)
4709 return false;
4710
4711 MachineFunction &MF = DAG.getMachineFunction();
4712 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4713 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4714 // Functions containing by val parameters are not supported.
4715 for (unsigned i = 0; i != Ins.size(); i++) {
4716 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4717 if (Flags.isByVal()) return false;
4718 }
4719
4720 // Non-PIC/GOT tail calls are supported.
4721 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4722 return true;
4723
4724 // At the moment we can only do local tail calls (in same module, hidden
4725 // or protected) if we are generating PIC.
4726 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4727 return G->getGlobal()->hasHiddenVisibility()
4728 || G->getGlobal()->hasProtectedVisibility();
4729 }
4730
4731 return false;
4732}
4733
4734/// isCallCompatibleAddress - Return the immediate to use if the specified
4735/// 32-bit value is representable in the immediate field of a BxA instruction.
4736static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4737 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4738 if (!C) return nullptr;
4739
4740 int Addr = C->getZExtValue();
4741 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4742 SignExtend32<26>(Addr) != Addr)
4743 return nullptr; // Top 6 bits have to be sext of immediate.
4744
4745 return DAG
4746 .getConstant(
4747 (int)C->getZExtValue() >> 2, SDLoc(Op),
4748 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4749 .getNode();
4750}
4751
4752namespace {
4753
4754struct TailCallArgumentInfo {
4755 SDValue Arg;
4756 SDValue FrameIdxOp;
4757 int FrameIdx = 0;
4758
4759 TailCallArgumentInfo() = default;
4760};
4761
4762} // end anonymous namespace
4763
4764/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4765static void StoreTailCallArgumentsToStackSlot(
4766 SelectionDAG &DAG, SDValue Chain,
4767 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4768 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4769 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4770 SDValue Arg = TailCallArgs[i].Arg;
4771 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4772 int FI = TailCallArgs[i].FrameIdx;
4773 // Store relative to framepointer.
4774 MemOpChains.push_back(DAG.getStore(
4775 Chain, dl, Arg, FIN,
4776 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4777 }
4778}
4779
4780/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4781/// the appropriate stack slot for the tail call optimized function call.
4782static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4783 SDValue OldRetAddr, SDValue OldFP,
4784 int SPDiff, const SDLoc &dl) {
4785 if (SPDiff) {
4786 // Calculate the new stack slot for the return address.
4787 MachineFunction &MF = DAG.getMachineFunction();
4788 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4789 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4790 bool isPPC64 = Subtarget.isPPC64();
4791 int SlotSize = isPPC64 ? 8 : 4;
4792 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4793 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4794 NewRetAddrLoc, true);
4795 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4796 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4797 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4798 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4799
4800 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4801 // slot as the FP is never overwritten.
4802 if (Subtarget.isDarwinABI()) {
4803 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4804 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4805 true);
4806 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4807 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4808 MachinePointerInfo::getFixedStack(
4809 DAG.getMachineFunction(), NewFPIdx));
4810 }
4811 }
4812 return Chain;
4813}
4814
4815/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4816/// the position of the argument.
4817static void
4818CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4819 SDValue Arg, int SPDiff, unsigned ArgOffset,
4820 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4821 int Offset = ArgOffset + SPDiff;
4822 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4823 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4824 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4825 SDValue FIN = DAG.getFrameIndex(FI, VT);
4826 TailCallArgumentInfo Info;
4827 Info.Arg = Arg;
4828 Info.FrameIdxOp = FIN;
4829 Info.FrameIdx = FI;
4830 TailCallArguments.push_back(Info);
4831}
4832
4833/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4834/// stack slot. Returns the chain as result and the loaded frame pointers in
4835/// LROpOut/FPOpout. Used when tail calling.
4836SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4837 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4838 SDValue &FPOpOut, const SDLoc &dl) const {
4839 if (SPDiff) {
4840 // Load the LR and FP stack slot for later adjusting.
4841 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4842 LROpOut = getReturnAddrFrameIndex(DAG);
4843 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4844 Chain = SDValue(LROpOut.getNode(), 1);
4845
4846 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4847 // slot as the FP is never overwritten.
4848 if (Subtarget.isDarwinABI()) {
4849 FPOpOut = getFramePointerFrameIndex(DAG);
4850 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4851 Chain = SDValue(FPOpOut.getNode(), 1);
4852 }
4853 }
4854 return Chain;
4855}
4856
4857/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4858/// by "Src" to address "Dst" of size "Size". Alignment information is
4859/// specified by the specific parameter attribute. The copy will be passed as
4860/// a byval function parameter.
4861/// Sometimes what we are copying is the end of a larger object, the part that
4862/// does not fit in registers.
4863static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4864 SDValue Chain, ISD::ArgFlagsTy Flags,
4865 SelectionDAG &DAG, const SDLoc &dl) {
4866 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4867 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4868 false, false, false, MachinePointerInfo(),
4869 MachinePointerInfo());
4870}
4871
4872/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4873/// tail calls.
4874static void LowerMemOpCallTo(
4875 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4876 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4877 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4878 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4879 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4880 if (!isTailCall) {
4881 if (isVector) {
4882 SDValue StackPtr;
4883 if (isPPC64)
4884 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4885 else
4886 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4887 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4888 DAG.getConstant(ArgOffset, dl, PtrVT));
4889 }
4890 MemOpChains.push_back(
4891 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4892 // Calculate and remember argument location.
4893 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4894 TailCallArguments);
4895}
4896
4897static void
4898PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4899 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4900 SDValue FPOp,
4901 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4902 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4903 // might overwrite each other in case of tail call optimization.
4904 SmallVector<SDValue, 8> MemOpChains2;
4905 // Do not flag preceding copytoreg stuff together with the following stuff.
4906 InFlag = SDValue();
4907 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4908 MemOpChains2, dl);
4909 if (!MemOpChains2.empty())
4910 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4911
4912 // Store the return address to the appropriate stack slot.
4913 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4914
4915 // Emit callseq_end just before tailcall node.
4916 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4917 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4918 InFlag = Chain.getValue(1);
4919}
4920
4921// Is this global address that of a function that can be called by name? (as
4922// opposed to something that must hold a descriptor for an indirect call).
4923static bool isFunctionGlobalAddress(SDValue Callee) {
4924 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4925 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4926 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4927 return false;
4928
4929 return G->getGlobal()->getValueType()->isFunctionTy();
4930 }
4931
4932 return false;
4933}
4934
4935static unsigned
4936PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4937 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4938 bool isPatchPoint, bool hasNest,
4939 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4940 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4941 ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4942 bool isPPC64 = Subtarget.isPPC64();
4943 bool isSVR4ABI = Subtarget.isSVR4ABI();
4944 bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
4945 bool isAIXABI = Subtarget.isAIXABI();
4946
4947 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4948 NodeTys.push_back(MVT::Other); // Returns a chain
4949 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4950
4951 unsigned CallOpc = PPCISD::CALL;
4952
4953 bool needIndirectCall = true;
4954 if (!isSVR4ABI || !isPPC64)
4955 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4956 // If this is an absolute destination address, use the munged value.
4957 Callee = SDValue(Dest, 0);
4958 needIndirectCall = false;
4959 }
4960
4961 // PC-relative references to external symbols should go through $stub, unless
4962 // we're building with the leopard linker or later, which automatically
4963 // synthesizes these stubs.
4964 const TargetMachine &TM = DAG.getTarget();
4965 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
4966 const GlobalValue *GV = nullptr;
4967 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4968 GV = G->getGlobal();
4969 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4970 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4971
4972 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4973 // every direct call is) turn it into a TargetGlobalAddress /
4974 // TargetExternalSymbol node so that legalize doesn't hack it.
4975 if (isFunctionGlobalAddress(Callee)) {
4976 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4977
4978 // A call to a TLS address is actually an indirect call to a
4979 // thread-specific pointer.
4980 unsigned OpFlags = 0;
4981 if (UsePlt)
4982 OpFlags = PPCII::MO_PLT;
4983
4984 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4985 Callee.getValueType(), 0, OpFlags);
4986 needIndirectCall = false;
4987 }
4988
4989 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4990 unsigned char OpFlags = 0;
4991
4992 if (UsePlt)
4993 OpFlags = PPCII::MO_PLT;
4994
4995 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4996 OpFlags);
4997 needIndirectCall = false;
4998 }
4999
5000 if (isPatchPoint) {
5001 // We'll form an invalid direct call when lowering a patchpoint; the full
5002 // sequence for an indirect call is complicated, and many of the
5003 // instructions introduced might have side effects (and, thus, can't be
5004 // removed later). The call itself will be removed as soon as the
5005 // argument/return lowering is complete, so the fact that it has the wrong
5006 // kind of operands should not really matter.
5007 needIndirectCall = false;
5008 }
5009
5010 if (needIndirectCall) {
5011 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
5012 // to do the call, we can't use PPCISD::CALL.
5013 SDValue MTCTROps[] = {Chain, Callee, InFlag};
5014
5015 if (is64BitELFv1ABI) {
5016 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5017 // entry point, but to the function descriptor (the function entry point
5018 // address is part of the function descriptor though).
5019 // The function descriptor is a three doubleword structure with the
5020 // following fields: function entry point, TOC base address and
5021 // environment pointer.
5022 // Thus for a call through a function pointer, the following actions need
5023 // to be performed:
5024 // 1. Save the TOC of the caller in the TOC save area of its stack
5025 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5026 // 2. Load the address of the function entry point from the function
5027 // descriptor.
5028 // 3. Load the TOC of the callee from the function descriptor into r2.
5029 // 4. Load the environment pointer from the function descriptor into
5030 // r11.
5031 // 5. Branch to the function entry point address.
5032 // 6. On return of the callee, the TOC of the caller needs to be
5033 // restored (this is done in FinishCall()).
5034 //
5035 // The loads are scheduled at the beginning of the call sequence, and the
5036 // register copies are flagged together to ensure that no other
5037 // operations can be scheduled in between. E.g. without flagging the
5038 // copies together, a TOC access in the caller could be scheduled between
5039 // the assignment of the callee TOC and the branch to the callee, which
5040 // results in the TOC access going through the TOC of the callee instead
5041 // of going through the TOC of the caller, which leads to incorrect code.
5042
5043 // Load the address of the function entry point from the function
5044 // descriptor.
5045 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5046 if (LDChain.getValueType() == MVT::Glue)
5047 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5048
5049 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5050 ? (MachineMemOperand::MODereferenceable |
5051 MachineMemOperand::MOInvariant)
5052 : MachineMemOperand::MONone;
5053
5054 MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
5055 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5056 /* Alignment = */ 8, MMOFlags);
5057
5058 // Load environment pointer into r11.
5059 SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5060 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5061 SDValue LoadEnvPtr =
5062 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5063 /* Alignment = */ 8, MMOFlags);
5064
5065 SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5066 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5067 SDValue TOCPtr =
5068 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5069 /* Alignment = */ 8, MMOFlags);
5070
5071 setUsesTOCBasePtr(DAG);
5072 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5073 InFlag);
5074 Chain = TOCVal.getValue(0);
5075 InFlag = TOCVal.getValue(1);
5076
5077 // If the function call has an explicit 'nest' parameter, it takes the
5078 // place of the environment pointer.
5079 if (!hasNest) {
5080 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5081 InFlag);
5082
5083 Chain = EnvVal.getValue(0);
5084 InFlag = EnvVal.getValue(1);
5085 }
5086
5087 MTCTROps[0] = Chain;
5088 MTCTROps[1] = LoadFuncPtr;
5089 MTCTROps[2] = InFlag;
5090 }
5091
5092 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5093 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5094 InFlag = Chain.getValue(1);
5095
5096 NodeTys.clear();
5097 NodeTys.push_back(MVT::Other);
5098 NodeTys.push_back(MVT::Glue);
5099 Ops.push_back(Chain);
5100 CallOpc = PPCISD::BCTRL;
5101 Callee.setNode(nullptr);
5102 // Add use of X11 (holding environment pointer)
5103 if (is64BitELFv1ABI && !hasNest)
5104 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5105 // Add CTR register as callee so a bctr can be emitted later.
5106 if (isTailCall)
5107 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5108 }
5109
5110 // If this is a direct call, pass the chain and the callee.
5111 if (Callee.getNode()) {
5112 Ops.push_back(Chain);
5113 Ops.push_back(Callee);
5114 }
5115 // If this is a tail call add stack pointer delta.
5116 if (isTailCall)
5117 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5118
5119 // Add argument registers to the end of the list so that they are known live
5120 // into the call.
5121 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5122 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5123 RegsToPass[i].second.getValueType()));
5124
5125 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5126 // live into the call.
5127 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5128 if ((isSVR4ABI && isPPC64) || isAIXABI) {
5129 setUsesTOCBasePtr(DAG);
5130
5131 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5132 // no way to mark dependencies as implicit here.
5133 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5134 if (!isPatchPoint)
5135 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
5136 : PPC::R2, PtrVT));
5137 }
5138
5139 return CallOpc;
5140}
5141
5142SDValue PPCTargetLowering::LowerCallResult(
5143 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5144 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5145 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5146 SmallVector<CCValAssign, 16> RVLocs;
5147 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5148 *DAG.getContext());
5149
5150 CCRetInfo.AnalyzeCallResult(
5151 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5152 ? RetCC_PPC_Cold
5153 : RetCC_PPC);
5154
5155 // Copy all of the result registers out of their specified physreg.
5156 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5157 CCValAssign &VA = RVLocs[i];
5158 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
;
5159
5160 SDValue Val;
5161
5162 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5163 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5164 InFlag);
5165 Chain = Lo.getValue(1);
5166 InFlag = Lo.getValue(2);
5167 VA = RVLocs[++i]; // skip ahead to next loc
5168 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5169 InFlag);
5170 Chain = Hi.getValue(1);
5171 InFlag = Hi.getValue(2);
5172 if (!Subtarget.isLittleEndian())
5173 std::swap (Lo, Hi);
5174 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5175 } else {
5176 Val = DAG.getCopyFromReg(Chain, dl,
5177 VA.getLocReg(), VA.getLocVT(), InFlag);
5178 Chain = Val.getValue(1);
5179 InFlag = Val.getValue(2);
5180 }
5181
5182 switch (VA.getLocInfo()) {
5183 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5183)
;
5184 case CCValAssign::Full: break;
5185 case CCValAssign::AExt:
5186 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5187 break;
5188 case CCValAssign::ZExt:
5189 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5190 DAG.getValueType(VA.getValVT()));
5191 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5192 break;
5193 case CCValAssign::SExt:
5194 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5195 DAG.getValueType(VA.getValVT()));
5196 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5197 break;
5198 }
5199
5200 InVals.push_back(Val);
5201 }
5202
5203 return Chain;
5204}
5205
5206SDValue PPCTargetLowering::FinishCall(
5207 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5208 bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5209 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5210 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5211 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5212 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5213 std::vector<EVT> NodeTys;
5214 SmallVector<SDValue, 8> Ops;
5215 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5216 SPDiff, isTailCall, isPatchPoint, hasNest,
5217 RegsToPass, Ops, NodeTys, CS, Subtarget);
5218
5219 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5220 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5221 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5222
5223 // When performing tail call optimization the callee pops its arguments off
5224 // the stack. Account for this here so these bytes can be pushed back on in
5225 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5226 int BytesCalleePops =
5227 (CallConv == CallingConv::Fast &&
5228 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
5229
5230 // Add a register mask operand representing the call-preserved registers.
5231 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5232 const uint32_t *Mask =
5233 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5234 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5234, __PRETTY_FUNCTION__))
;
5235 Ops.push_back(DAG.getRegisterMask(Mask));
5236
5237 if (InFlag.getNode())
5238 Ops.push_back(InFlag);
5239
5240 // Emit tail call.
5241 if (isTailCall) {
5242 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5243 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5244 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5245 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5246 isa<ConstantSDNode>(Callee)) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
5247 "Expecting an global address, external symbol, absolute value or register")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"
) ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && \"Expecting an global address, external symbol, absolute value or register\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5247, __PRETTY_FUNCTION__))
;
5248
5249 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5250 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5251 }
5252
5253 // Add a NOP immediately after the branch instruction when using the 64-bit
5254 // SVR4 or the AIX ABI.
5255 // At link time, if caller and callee are in a different module and
5256 // thus have a different TOC, the call will be replaced with a call to a stub
5257 // function which saves the current TOC, loads the TOC of the callee and
5258 // branches to the callee. The NOP will be replaced with a load instruction
5259 // which restores the TOC of the caller from the TOC save slot of the current
5260 // stack frame. If caller and callee belong to the same module (and have the
5261 // same TOC), the NOP will remain unchanged, or become some other NOP.
5262
5263 MachineFunction &MF = DAG.getMachineFunction();
5264 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5265 if (!isTailCall && !isPatchPoint &&
5266 ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
5267 Subtarget.isAIXABI())) {
5268 if (CallOpc == PPCISD::BCTRL) {
5269 if (Subtarget.isAIXABI())
5270 report_fatal_error("Indirect call on AIX is not implemented.");
5271
5272 // This is a call through a function pointer.
5273 // Restore the caller TOC from the save area into R2.
5274 // See PrepareCall() for more information about calls through function
5275 // pointers in the 64-bit SVR4 ABI.
5276 // We are using a target-specific load with r2 hard coded, because the
5277 // result of a target-independent load would never go directly into r2,
5278 // since r2 is a reserved register (which prevents the register allocator
5279 // from allocating it), resulting in an additional register being
5280 // allocated and an unnecessary move instruction being generated.
5281 CallOpc = PPCISD::BCTRL_LOAD_TOC;
5282
5283 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5284 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5285 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5286 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5287
5288 // The address needs to go after the chain input but before the flag (or
5289 // any other variadic arguments).
5290 Ops.insert(std::next(Ops.begin()), AddTOC);
5291 } else if (CallOpc == PPCISD::CALL &&
5292 !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5293 // Otherwise insert NOP for non-local calls.
5294 CallOpc = PPCISD::CALL_NOP;
5295 }
5296 }
5297
5298 if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
5299 // On AIX, direct function calls reference the symbol for the function's
5300 // entry point, which is named by inserting a "." before the function's
5301 // C-linkage name.
5302 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
5303 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5304 MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
5305 Twine(G->getGlobal()->getName()));
5306 Callee = DAG.getMCSymbol(S, PtrVT);
5307 // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5308 Ops[1] = Callee;
5309 }
5310
5311 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5312 InFlag = Chain.getValue(1);
5313
5314 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5315 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5316 InFlag, dl);
5317 if (!Ins.empty())
5318 InFlag = Chain.getValue(1);
5319
5320 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5321 Ins, dl, DAG, InVals);
5322}
5323
5324SDValue
5325PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5326 SmallVectorImpl<SDValue> &InVals) const {
5327 SelectionDAG &DAG = CLI.DAG;
5328 SDLoc &dl = CLI.DL;
5329 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5330 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5331 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5332 SDValue Chain = CLI.Chain;
5333 SDValue Callee = CLI.Callee;
5334 bool &isTailCall = CLI.IsTailCall;
5335 CallingConv::ID CallConv = CLI.CallConv;
5336 bool isVarArg = CLI.IsVarArg;
5337 bool isPatchPoint = CLI.IsPatchPoint;
5338 ImmutableCallSite CS = CLI.CS;
5339
5340 if (isTailCall) {
5341 if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5342 isTailCall = false;
5343 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5344 isTailCall =
5345 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5346 isVarArg, Outs, Ins, DAG);
5347 else
5348 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5349 Ins, DAG);
5350 if (isTailCall) {
5351 ++NumTailCalls;
5352 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5353 ++NumSiblingCalls;
5354
5355 assert(isa<GlobalAddressSDNode>(Callee) &&((isa<GlobalAddressSDNode>(Callee) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("isa<GlobalAddressSDNode>(Callee) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5356, __PRETTY_FUNCTION__))
5356 "Callee should be an llvm::Function object.")((isa<GlobalAddressSDNode>(Callee) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("isa<GlobalAddressSDNode>(Callee) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5356, __PRETTY_FUNCTION__))
;
5357 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5358 const GlobalValue *GV =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5359 cast<GlobalAddressSDNode>(Callee)->getGlobal();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5360 const unsigned Width =do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5361 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5362 dbgs() << "TCO caller: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5363 << left_justify(DAG.getMachineFunction().getName(), Width)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5364 << ", callee linkage: " << GV->getVisibility() << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
5365 << GV->getLinkage() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { const GlobalValue *GV = cast<GlobalAddressSDNode
>(Callee)->getGlobal(); const unsigned Width = 80 - strlen
("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() <<
"TCO caller: " << left_justify(DAG.getMachineFunction(
).getName(), Width) << ", callee linkage: " << GV
->getVisibility() << ", " << GV->getLinkage
() << "\n"; } } while (false)
;
5366 }
5367 }
5368
5369 if (!isTailCall && CS && CS.isMustTailCall())
5370 report_fatal_error("failed to perform tail call elimination on a call "
5371 "site marked musttail");
5372
5373 // When long calls (i.e. indirect calls) are always used, calls are always
5374 // made via function pointer. If we have a function name, first translate it
5375 // into a pointer.
5376 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5377 !isTailCall)
5378 Callee = LowerGlobalAddress(Callee, DAG);
5379
5380 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5381 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5382 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5383 dl, DAG, InVals, CS);
5384
5385 if (Subtarget.isSVR4ABI())
5386 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5387 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5388 dl, DAG, InVals, CS);
5389
5390 if (Subtarget.isAIXABI())
5391 return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
5392 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5393 dl, DAG, InVals, CS);
5394
5395 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5396 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5397 dl, DAG, InVals, CS);
5398}
5399
5400SDValue PPCTargetLowering::LowerCall_32SVR4(
5401 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5402 bool isTailCall, bool isPatchPoint,
5403 const SmallVectorImpl<ISD::OutputArg> &Outs,
5404 const SmallVectorImpl<SDValue> &OutVals,
5405 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5406 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5407 ImmutableCallSite CS) const {
5408 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5409 // of the 32-bit SVR4 ABI stack frame layout.
5410
5411 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
5412 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
5413 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5413, __PRETTY_FUNCTION__))
;
5414
5415 unsigned PtrByteSize = 4;
5416
5417 MachineFunction &MF = DAG.getMachineFunction();
5418
5419 // Mark this function as potentially containing a function that contains a
5420 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5421 // and restoring the callers stack pointer in this functions epilog. This is
5422 // done because by tail calling the called function might overwrite the value
5423 // in this function's (MF) stack pointer stack slot 0(SP).
5424 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5425 CallConv == CallingConv::Fast)
5426 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5427
5428 // Count how many bytes are to be pushed on the stack, including the linkage
5429 // area, parameter list area and the part of the local variable space which
5430 // contains copies of aggregates which are passed by value.
5431
5432 // Assign locations to all of the outgoing arguments.
5433 SmallVector<CCValAssign, 16> ArgLocs;
5434 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5435
5436 // Reserve space for the linkage area on the stack.
5437 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5438 PtrByteSize);
5439 if (useSoftFloat())
5440 CCInfo.PreAnalyzeCallOperands(Outs);
5441
5442 if (isVarArg) {
5443 // Handle fixed and variable vector arguments differently.
5444 // Fixed vector arguments go into registers as long as registers are
5445 // available. Variable vector arguments always go into memory.
5446 unsigned NumArgs = Outs.size();
5447
5448 for (unsigned i = 0; i != NumArgs; ++i) {
5449 MVT ArgVT = Outs[i].VT;
5450 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5451 bool Result;
5452
5453 if (Outs[i].IsFixed) {
5454 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5455 CCInfo);
5456 } else {
5457 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5458 ArgFlags, CCInfo);
5459 }
5460
5461 if (Result) {
5462#ifndef NDEBUG
5463 errs() << "Call operand #" << i << " has unhandled type "
5464 << EVT(ArgVT).getEVTString() << "\n";
5465#endif
5466 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5466)
;
5467 }
5468 }
5469 } else {
5470 // All arguments are treated the same.
5471 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5472 }
5473 CCInfo.clearWasPPCF128();
5474
5475 // Assign locations to all of the outgoing aggregate by value arguments.
5476 SmallVector<CCValAssign, 16> ByValArgLocs;
5477 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5478
5479 // Reserve stack space for the allocations in CCInfo.
5480 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5481
5482 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5483
5484 // Size of the linkage area, parameter list area and the part of the local
5485 // space variable where copies of aggregates which are passed by value are
5486 // stored.
5487 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5488
5489 // Calculate by how many bytes the stack has to be adjusted in case of tail
5490 // call optimization.
5491 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5492
5493 // Adjust the stack pointer for the new arguments...
5494 // These operations are automatically eliminated by the prolog/epilog pass
5495 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5496 SDValue CallSeqStart = Chain;
5497
5498 // Load the return address and frame pointer so it can be moved somewhere else
5499 // later.
5500 SDValue LROp, FPOp;
5501 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5502
5503 // Set up a copy of the stack pointer for use loading and storing any
5504 // arguments that may not fit in the registers available for argument
5505 // passing.
5506 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5507
5508 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5509 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5510 SmallVector<SDValue, 8> MemOpChains;
5511
5512 bool seenFloatArg = false;
5513 // Walk the register/memloc assignments, inserting copies/loads.
5514 // i - Tracks the index into the list of registers allocated for the call
5515 // RealArgIdx - Tracks the index into the list of actual function arguments
5516 // j - Tracks the index into the list of byval arguments
5517 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5518 i != e;
5519 ++i, ++RealArgIdx) {
5520 CCValAssign &VA = ArgLocs[i];
5521 SDValue Arg = OutVals[RealArgIdx];
5522 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5523
5524 if (Flags.isByVal()) {
5525 // Argument is an aggregate which is passed by value, thus we need to
5526 // create a copy of it in the local variable space of the current stack
5527 // frame (which is the stack frame of the caller) and pass the address of
5528 // this copy to the callee.
5529 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5529, __PRETTY_FUNCTION__))
;
5530 CCValAssign &ByValVA = ByValArgLocs[j++];
5531 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5531, __PRETTY_FUNCTION__))
;
5532
5533 // Memory reserved in the local variable space of the callers stack frame.
5534 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5535
5536 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5537 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5538 StackPtr, PtrOff);
5539
5540 // Create a copy of the argument in the local area of the current
5541 // stack frame.
5542 SDValue MemcpyCall =
5543 CreateCopyOfByValArgument(Arg, PtrOff,
5544 CallSeqStart.getNode()->getOperand(0),
5545 Flags, DAG, dl);
5546
5547 // This must go outside the CALLSEQ_START..END.
5548 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5549 SDLoc(MemcpyCall));
5550 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5551 NewCallSeqStart.getNode());
5552 Chain = CallSeqStart = NewCallSeqStart;
5553
5554 // Pass the address of the aggregate copy on the stack either in a
5555 // physical register or in the parameter list area of the current stack
5556 // frame to the callee.
5557 Arg = PtrOff;
5558 }
5559
5560 // When useCRBits() is true, there can be i1 arguments.
5561 // It is because getRegisterType(MVT::i1) => MVT::i1,
5562 // and for other integer types getRegisterType() => MVT::i32.
5563 // Extend i1 and ensure callee will get i32.
5564 if (Arg.getValueType() == MVT::i1)
5565 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5566 dl, MVT::i32, Arg);
5567
5568 if (VA.isRegLoc()) {
5569 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5570 // Put argument in a physical register.
5571 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5572 bool IsLE = Subtarget.isLittleEndian();
5573 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5574 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5575 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5576 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5577 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5578 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5579 SVal.getValue(0)));
5580 } else
5581 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5582 } else {
5583 // Put argument in the parameter list area of the current stack frame.
5584 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5584, __PRETTY_FUNCTION__))
;
5585 unsigned LocMemOffset = VA.getLocMemOffset();
5586
5587 if (!isTailCall) {
5588 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5589 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5590 StackPtr, PtrOff);
5591
5592 MemOpChains.push_back(
5593 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5594 } else {
5595 // Calculate and remember argument location.
5596 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5597 TailCallArguments);
5598 }
5599 }
5600 }
5601
5602 if (!MemOpChains.empty())
5603 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5604
5605 // Build a sequence of copy-to-reg nodes chained together with token chain
5606 // and flag operands which copy the outgoing args into the appropriate regs.
5607 SDValue InFlag;
5608 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5609 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5610 RegsToPass[i].second, InFlag);
5611 InFlag = Chain.getValue(1);
5612 }
5613
5614 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5615 // registers.
5616 if (isVarArg) {
5617 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5618 SDValue Ops[] = { Chain, InFlag };
5619
5620 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5621 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5622
5623 InFlag = Chain.getValue(1);
5624 }
5625
5626 if (isTailCall)
5627 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5628 TailCallArguments);
5629
5630 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5631 /* unused except on PPC64 ELFv1 */ false, DAG,
5632 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5633 NumBytes, Ins, InVals, CS);
5634}
5635
5636// Copy an argument into memory, being careful to do this outside the
5637// call sequence for the call to which the argument belongs.
5638SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5639 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5640 SelectionDAG &DAG, const SDLoc &dl) const {
5641 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5642 CallSeqStart.getNode()->getOperand(0),
5643 Flags, DAG, dl);
5644 // The MEMCPY must go outside the CALLSEQ_START..END.
5645 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5646 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5647 SDLoc(MemcpyCall));
5648 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5649 NewCallSeqStart.getNode());
5650 return NewCallSeqStart;
5651}
5652
5653SDValue PPCTargetLowering::LowerCall_64SVR4(
5654 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5655 bool isTailCall, bool isPatchPoint,
5656 const SmallVectorImpl<ISD::OutputArg> &Outs,
5657 const SmallVectorImpl<SDValue> &OutVals,
5658 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5659 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5660 ImmutableCallSite CS) const {
5661 bool isELFv2ABI = Subtarget.isELFv2ABI();
5662 bool isLittleEndian = Subtarget.isLittleEndian();
5663 unsigned NumOps = Outs.size();
5664 bool hasNest = false;
5665 bool IsSibCall = false;
5666
5667 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5668 unsigned PtrByteSize = 8;
5669
5670 MachineFunction &MF = DAG.getMachineFunction();
5671
5672 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5673 IsSibCall = true;
5674
5675 // Mark this function as potentially containing a function that contains a
5676 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5677 // and restoring the callers stack pointer in this functions epilog. This is
5678 // done because by tail calling the called function might overwrite the value
5679 // in this function's (MF) stack pointer stack slot 0(SP).
5680 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5681 CallConv == CallingConv::Fast)
5682 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5683
5684 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5685, __PRETTY_FUNCTION__))
5685 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5685, __PRETTY_FUNCTION__))
;
5686
5687 // Count how many bytes are to be pushed on the stack, including the linkage
5688 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5689 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5690 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5691 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5692 unsigned NumBytes = LinkageSize;
5693 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5694 unsigned &QFPR_idx = FPR_idx;
5695
5696 static const MCPhysReg GPR[] = {
5697 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5698 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5699 };
5700 static const MCPhysReg VR[] = {
5701 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5702 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5703 };
5704
5705 const unsigned NumGPRs = array_lengthof(GPR);
5706 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5707 const unsigned NumVRs = array_lengthof(VR);
5708 const unsigned NumQFPRs = NumFPRs;
5709
5710 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5711 // can be passed to the callee in registers.
5712 // For the fast calling convention, there is another check below.
5713 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5714 bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5715 if (!HasParameterArea) {
5716 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5717 unsigned AvailableFPRs = NumFPRs;
5718 unsigned AvailableVRs = NumVRs;
5719 unsigned NumBytesTmp = NumBytes;
5720 for (unsigned i = 0; i != NumOps; ++i) {
5721 if (Outs[i].Flags.isNest()) continue;
5722 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5723 PtrByteSize, LinkageSize, ParamAreaSize,
5724 NumBytesTmp, AvailableFPRs, AvailableVRs,
5725 Subtarget.hasQPX()))
5726 HasParameterArea = true;
5727 }
5728 }
5729
5730 // When using the fast calling convention, we don't provide backing for
5731 // arguments that will be in registers.
5732 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5733
5734 // Avoid allocating parameter area for fastcc functions if all the arguments
5735 // can be passed in the registers.
5736 if (CallConv == CallingConv::Fast)
5737 HasParameterArea = false;
5738
5739 // Add up all the space actually used.
5740 for (unsigned i = 0; i != NumOps; ++i) {
5741 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5742 EVT ArgVT = Outs[i].VT;
5743 EVT OrigVT = Outs[i].ArgVT;
5744
5745 if (Flags.isNest())
5746 continue;
5747
5748 if (CallConv == CallingConv::Fast) {
5749 if (Flags.isByVal()) {
5750 NumGPRsUsed += (Flags.getByValSize()+7)/8;
5751 if (NumGPRsUsed > NumGPRs)
5752 HasParameterArea = true;
5753 } else {
5754 switch (ArgVT.getSimpleVT().SimpleTy) {
5755 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5755)
;
5756 case MVT::i1:
5757 case MVT::i32:
5758 case MVT::i64:
5759 if (++NumGPRsUsed <= NumGPRs)
5760 continue;
5761 break;
5762 case MVT::v4i32:
5763 case MVT::v8i16:
5764 case MVT::v16i8:
5765 case MVT::v2f64:
5766 case MVT::v2i64:
5767 case MVT::v1i128:
5768 case MVT::f128:
5769 if (++NumVRsUsed <= NumVRs)
5770 continue;
5771 break;
5772 case MVT::v4f32:
5773 // When using QPX, this is handled like a FP register, otherwise, it
5774 // is an Altivec register.
5775 if (Subtarget.hasQPX()) {
5776 if (++NumFPRsUsed <= NumFPRs)
5777 continue;
5778 } else {
5779 if (++NumVRsUsed <= NumVRs)
5780 continue;
5781 }
5782 break;
5783 case MVT::f32:
5784 case MVT::f64:
5785 case MVT::v4f64: // QPX
5786 case MVT::v4i1: // QPX
5787 if (++NumFPRsUsed <= NumFPRs)
5788 continue;
5789 break;
5790 }
5791 HasParameterArea = true;
5792 }
5793 }
5794
5795 /* Respect alignment of argument on the stack. */
5796 unsigned Align =
5797 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5798 NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5799
5800 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5801 if (Flags.isInConsecutiveRegsLast())
5802 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5803 }
5804
5805 unsigned NumBytesActuallyUsed = NumBytes;
5806
5807 // In the old ELFv1 ABI,
5808 // the prolog code of the callee may store up to 8 GPR argument registers to
5809 // the stack, allowing va_start to index over them in memory if its varargs.
5810 // Because we cannot tell if this is needed on the caller side, we have to
5811 // conservatively assume that it is needed. As such, make sure we have at
5812 // least enough stack space for the caller to store the 8 GPRs.
5813 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5814 // really requires memory operands, e.g. a vararg function.
5815 if (HasParameterArea)
5816 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5817 else
5818 NumBytes = LinkageSize;
5819
5820 // Tail call needs the stack to be aligned.
5821 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5822 CallConv == CallingConv::Fast)
5823 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5824
5825 int SPDiff = 0;
5826
5827 // Calculate by how many bytes the stack has to be adjusted in case of tail
5828 // call optimization.
5829 if (!IsSibCall)
5830 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5831
5832 // To protect arguments on the stack from being clobbered in a tail call,
5833 // force all the loads to happen before doing any other lowering.
5834 if (isTailCall)
5835 Chain = DAG.getStackArgumentTokenFactor(Chain);
5836
5837 // Adjust the stack pointer for the new arguments...
5838 // These operations are automatically eliminated by the prolog/epilog pass
5839 if (!IsSibCall)
5840 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5841 SDValue CallSeqStart = Chain;
5842
5843 // Load the return address and frame pointer so it can be move somewhere else
5844 // later.
5845 SDValue LROp, FPOp;
5846 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5847
5848 // Set up a copy of the stack pointer for use loading and storing any
5849 // arguments that may not fit in the registers available for argument
5850 // passing.
5851 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5852
5853 // Figure out which arguments are going to go in registers, and which in
5854 // memory. Also, if this is a vararg function, floating point operations
5855 // must be stored to our stack, and loaded into integer regs as well, if
5856 // any integer regs are available for argument passing.
5857 unsigned ArgOffset = LinkageSize;
5858
5859 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5860 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5861
5862 SmallVector<SDValue, 8> MemOpChains;
5863 for (unsigned i = 0; i != NumOps; ++i) {
5864 SDValue Arg = OutVals[i];
5865 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5866 EVT ArgVT = Outs[i].VT;
5867 EVT OrigVT = Outs[i].ArgVT;
5868
5869 // PtrOff will be used to store the current argument to the stack if a
5870 // register cannot be found for it.
5871 SDValue PtrOff;
5872
5873 // We re-align the argument offset for each argument, except when using the
5874 // fast calling convention, when we need to make sure we do that only when
5875 // we'll actually use a stack slot.
5876 auto ComputePtrOff = [&]() {
5877 /* Respect alignment of argument on the stack. */
5878 unsigned Align =
5879 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5880 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5881
5882 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5883
5884 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5885 };
5886
5887 if (CallConv != CallingConv::Fast) {
5888 ComputePtrOff();
5889
5890 /* Compute GPR index associated with argument offset. */
5891 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5892 GPR_idx = std::min(GPR_idx, NumGPRs);
5893 }
5894
5895 // Promote integers to 64-bit values.
5896 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5897 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5898 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5899 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5900 }
5901
5902 // FIXME memcpy is used way more than necessary. Correctness first.
5903 // Note: "by value" is code for passing a structure by value, not
5904 // basic types.
5905 if (Flags.isByVal()) {
5906 // Note: Size includes alignment padding, so
5907 // struct x { short a; char b; }
5908 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5909 // These are the proper values we need for right-justifying the
5910 // aggregate in a parameter register.
5911 unsigned Size = Flags.getByValSize();
5912
5913 // An empty aggregate parameter takes up no storage and no
5914 // registers.
5915 if (Size == 0)
5916 continue;
5917
5918 if (CallConv == CallingConv::Fast)
5919 ComputePtrOff();
5920
5921 // All aggregates smaller than 8 bytes must be passed right-justified.
5922 if (Size==1 || Size==2 || Size==4) {
5923 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5924 if (GPR_idx != NumGPRs) {
5925 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5926 MachinePointerInfo(), VT);
5927 MemOpChains.push_back(Load.getValue(1));
5928 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5929
5930 ArgOffset += PtrByteSize;
5931 continue;
5932 }
5933 }
5934
5935 if (GPR_idx == NumGPRs && Size < 8) {
5936 SDValue AddPtr = PtrOff;
5937 if (!isLittleEndian) {
5938 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5939 PtrOff.getValueType());
5940 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5941 }
5942 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5943 CallSeqStart,
5944 Flags, DAG, dl);
5945 ArgOffset += PtrByteSize;
5946 continue;
5947 }
5948 // Copy entire object into memory. There are cases where gcc-generated
5949 // code assumes it is there, even if it could be put entirely into
5950 // registers. (This is not what the doc says.)
5951
5952 // FIXME: The above statement is likely due to a misunderstanding of the
5953 // documents. All arguments must be copied into the parameter area BY
5954 // THE CALLEE in the event that the callee takes the address of any
5955 // formal argument. That has not yet been implemented. However, it is
5956 // reasonable to use the stack area as a staging area for the register
5957 // load.
5958
5959 // Skip this for small aggregates, as we will use the same slot for a
5960 // right-justified copy, below.
5961 if (Size >= 8)
5962 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5963 CallSeqStart,
5964 Flags, DAG, dl);
5965
5966 // When a register is available, pass a small aggregate right-justified.
5967 if (Size < 8 && GPR_idx != NumGPRs) {
5968 // The easiest way to get this right-justified in a register
5969 // is to copy the structure into the rightmost portion of a
5970 // local variable slot, then load the whole slot into the
5971 // register.
5972 // FIXME: The memcpy seems to produce pretty awful code for
5973 // small aggregates, particularly for packed ones.
5974 // FIXME: It would be preferable to use the slot in the
5975 // parameter save area instead of a new local variable.
5976 SDValue AddPtr = PtrOff;
5977 if (!isLittleEndian) {
5978 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5979 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5980 }
5981 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5982 CallSeqStart,
5983 Flags, DAG, dl);
5984
5985 // Load the slot into the register.
5986 SDValue Load =
5987 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5988 MemOpChains.push_back(Load.getValue(1));
5989 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5990
5991 // Done with this argument.
5992 ArgOffset += PtrByteSize;
5993 continue;
5994 }
5995
5996 // For aggregates larger than PtrByteSize, copy the pieces of the
5997 // object that fit into registers from the parameter save area.
5998 for (unsigned j=0; j<Size; j+=PtrByteSize) {
5999 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6000 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6001 if (GPR_idx != NumGPRs) {
6002 SDValue Load =
6003 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6004 MemOpChains.push_back(Load.getValue(1));
6005 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6006 ArgOffset += PtrByteSize;
6007 } else {
6008 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6009 break;
6010 }
6011 }
6012 continue;
6013 }
6014
6015 switch (Arg.getSimpleValueType().SimpleTy) {
6016 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6016)
;
6017 case MVT::i1:
6018 case MVT::i32:
6019 case MVT::i64:
6020 if (Flags.isNest()) {
6021 // The 'nest' parameter, if any, is passed in R11.
6022 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6023 hasNest = true;
6024 break;
6025 }
6026
6027 // These can be scalar arguments or elements of an integer array type
6028 // passed directly. Clang may use those instead of "byval" aggregate
6029 // types to avoid forcing arguments to memory unnecessarily.
6030 if (GPR_idx != NumGPRs) {
6031 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6032 } else {
6033 if (CallConv == CallingConv::Fast)
6034 ComputePtrOff();
6035
6036 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6037, __PRETTY_FUNCTION__))
6037 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6037, __PRETTY_FUNCTION__))
;
6038 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6039 true, isTailCall, false, MemOpChains,
6040 TailCallArguments, dl);
6041 if (CallConv == CallingConv::Fast)
6042 ArgOffset += PtrByteSize;
6043 }
6044 if (CallConv != CallingConv::Fast)
6045 ArgOffset += PtrByteSize;
6046 break;
6047 case MVT::f32:
6048 case MVT::f64: {
6049 // These can be scalar arguments or elements of a float array type
6050 // passed directly. The latter are used to implement ELFv2 homogenous
6051 // float aggregates.
6052
6053 // Named arguments go into FPRs first, and once they overflow, the
6054 // remaining arguments go into GPRs and then the parameter save area.
6055 // Unnamed arguments for vararg functions always go to GPRs and
6056 // then the parameter save area. For now, put all arguments to vararg
6057 // routines always in both locations (FPR *and* GPR or stack slot).
6058 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
6059 bool NeededLoad = false;
6060
6061 // First load the argument into the next available FPR.
6062 if (FPR_idx != NumFPRs)
6063 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6064
6065 // Next, load the argument into GPR or stack slot if needed.
6066 if (!NeedGPROrStack)
6067 ;
6068 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
6069 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6070 // once we support fp <-> gpr moves.
6071
6072 // In the non-vararg case, this can only ever happen in the
6073 // presence of f32 array types, since otherwise we never run
6074 // out of FPRs before running out of GPRs.
6075 SDValue ArgVal;
6076
6077 // Double values are always passed in a single GPR.
6078 if (Arg.getValueType() != MVT::f32) {
6079 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6080
6081 // Non-array float values are extended and passed in a GPR.
6082 } else if (!Flags.isInConsecutiveRegs()) {
6083 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6084 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6085
6086 // If we have an array of floats, we collect every odd element
6087 // together with its predecessor into one GPR.
6088 } else if (ArgOffset % PtrByteSize != 0) {
6089 SDValue Lo, Hi;
6090 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6091 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6092 if (!isLittleEndian)
6093 std::swap(Lo, Hi);
6094 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6095
6096 // The final element, if even, goes into the first half of a GPR.
6097 } else if (Flags.isInConsecutiveRegsLast()) {
6098 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6099 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6100 if (!isLittleEndian)
6101 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6102 DAG.getConstant(32, dl, MVT::i32));
6103
6104 // Non-final even elements are skipped; they will be handled
6105 // together the with subsequent argument on the next go-around.
6106 } else
6107 ArgVal = SDValue();
6108
6109 if (ArgVal.getNode())
6110 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6111 } else {
6112 if (CallConv == CallingConv::Fast)
6113 ComputePtrOff();
6114
6115 // Single-precision floating-point values are mapped to the
6116 // second (rightmost) word of the stack doubleword.
6117 if (Arg.getValueType() == MVT::f32 &&
6118 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6119 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6120 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6121 }
6122
6123 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6124, __PRETTY_FUNCTION__))
6124 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6124, __PRETTY_FUNCTION__))
;
6125 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6126 true, isTailCall, false, MemOpChains,
6127 TailCallArguments, dl);
6128
6129 NeededLoad = true;
6130 }
6131 // When passing an array of floats, the array occupies consecutive
6132 // space in the argument area; only round up to the next doubleword
6133 // at the end of the array. Otherwise, each float takes 8 bytes.
6134 if (CallConv != CallingConv::Fast || NeededLoad) {
6135 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6136 Flags.isInConsecutiveRegs()) ? 4 : 8;
6137 if (Flags.isInConsecutiveRegsLast())
6138 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6139 }
6140 break;
6141 }
6142 case MVT::v4f32:
6143 case MVT::v4i32:
6144 case MVT::v8i16:
6145 case MVT::v16i8:
6146 case MVT::v2f64:
6147 case MVT::v2i64:
6148 case MVT::v1i128:
6149 case MVT::f128:
6150 if (!Subtarget.hasQPX()) {
6151 // These can be scalar arguments or elements of a vector array type
6152 // passed directly. The latter are used to implement ELFv2 homogenous
6153 // vector aggregates.
6154
6155 // For a varargs call, named arguments go into VRs or on the stack as
6156 // usual; unnamed arguments always go to the stack or the corresponding
6157 // GPRs when within range. For now, we always put the value in both
6158 // locations (or even all three).
6159 if (isVarArg) {
6160 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6161, __PRETTY_FUNCTION__))
6161 "Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6161, __PRETTY_FUNCTION__))
;
6162 // We could elide this store in the case where the object fits
6163 // entirely in R registers. Maybe later.
6164 SDValue Store =
6165 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6166 MemOpChains.push_back(Store);
6167 if (VR_idx != NumVRs) {
6168 SDValue Load =
6169 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6170 MemOpChains.push_back(Load.getValue(1));
6171 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6172 }
6173 ArgOffset += 16;
6174 for (unsigned i=0; i<16; i+=PtrByteSize) {
6175 if (GPR_idx == NumGPRs)
6176 break;
6177 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6178 DAG.getConstant(i, dl, PtrVT));
6179 SDValue Load =
6180 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6181 MemOpChains.push_back(Load.getValue(1));
6182 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6183 }
6184 break;
6185 }
6186
6187 // Non-varargs Altivec params go into VRs or on the stack.
6188 if (VR_idx != NumVRs) {
6189 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6190 } else {
6191 if (CallConv == CallingConv::Fast)
6192 ComputePtrOff();
6193
6194 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6195, __PRETTY_FUNCTION__))
6195 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6195, __PRETTY_FUNCTION__))
;
6196 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6197 true, isTailCall, true, MemOpChains,
6198 TailCallArguments, dl);
6199 if (CallConv == CallingConv::Fast)
6200 ArgOffset += 16;
6201 }
6202
6203 if (CallConv != CallingConv::Fast)
6204 ArgOffset += 16;
6205 break;
6206 } // not QPX
6207
6208 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&((Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type") ? static_cast<void> (0) :
__assert_fail ("Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6209, __PRETTY_FUNCTION__))
6209 "Invalid QPX parameter type")((Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type") ? static_cast<void> (0) :
__assert_fail ("Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && \"Invalid QPX parameter type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6209, __PRETTY_FUNCTION__))
;
6210
6211 LLVM_FALLTHROUGH[[gnu::fallthrough]];
6212 case MVT::v4f64:
6213 case MVT::v4i1: {
6214 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6215 if (isVarArg) {
6216 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6217, __PRETTY_FUNCTION__))
6217 "Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6217, __PRETTY_FUNCTION__))
;
6218 // We could elide this store in the case where the object fits
6219 // entirely in R registers. Maybe later.
6220 SDValue Store =
6221 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6222 MemOpChains.push_back(Store);
6223 if (QFPR_idx != NumQFPRs) {
6224 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6225 PtrOff, MachinePointerInfo());
6226 MemOpChains.push_back(Load.getValue(1));
6227 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6228 }
6229 ArgOffset += (IsF32 ? 16 : 32);
6230 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6231 if (GPR_idx == NumGPRs)
6232 break;
6233 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6234 DAG.getConstant(i, dl, PtrVT));
6235 SDValue Load =
6236 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6237 MemOpChains.push_back(Load.getValue(1));
6238 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6239 }
6240 break;
6241 }
6242
6243 // Non-varargs QPX params go into registers or on the stack.
6244 if (QFPR_idx != NumQFPRs) {
6245 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6246 } else {
6247 if (CallConv == CallingConv::Fast)
6248 ComputePtrOff();
6249
6250 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6251, __PRETTY_FUNCTION__))
6251 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6251, __PRETTY_FUNCTION__))
;
6252 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6253 true, isTailCall, true, MemOpChains,
6254 TailCallArguments, dl);
6255 if (CallConv == CallingConv::Fast)
6256 ArgOffset += (IsF32 ? 16 : 32);
6257 }
6258
6259 if (CallConv != CallingConv::Fast)
6260 ArgOffset += (IsF32 ? 16 : 32);
6261 break;
6262 }
6263 }
6264 }
6265
6266 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6267, __PRETTY_FUNCTION__))
6267 "mismatch in size of parameter area")(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6267, __PRETTY_FUNCTION__))
;
6268 (void)NumBytesActuallyUsed;
6269
6270 if (!MemOpChains.empty())
6271 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6272
6273 // Check if this is an indirect call (MTCTR/BCTRL).
6274 // See PrepareCall() for more information about calls through function
6275 // pointers in the 64-bit SVR4 ABI.
6276 if (!isTailCall && !isPatchPoint &&
6277 !isFunctionGlobalAddress(Callee) &&
6278 !isa<ExternalSymbolSDNode>(Callee)) {
6279 // Load r2 into a virtual register and store it to the TOC save area.
6280 setUsesTOCBasePtr(DAG);
6281 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6282 // TOC save area offset.
6283 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6284 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6285 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6286 Chain = DAG.getStore(
6287 Val.getValue(1), dl, Val, AddPtr,
6288 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
6289 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6290 // This does not mean the MTCTR instruction must use R12; it's easier
6291 // to model this as an extra parameter, so do that.
6292 if (isELFv2ABI && !isPatchPoint)
6293 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6294 }
6295
6296 // Build a sequence of copy-to-reg nodes chained together with token chain
6297 // and flag operands which copy the outgoing args into the appropriate regs.
6298 SDValue InFlag;
6299 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6300 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6301 RegsToPass[i].second, InFlag);
6302 InFlag = Chain.getValue(1);
6303 }
6304
6305 if (isTailCall && !IsSibCall)
6306 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6307 TailCallArguments);
6308
6309 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
6310 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
6311 SPDiff, NumBytes, Ins, InVals, CS);
6312}
6313
6314SDValue PPCTargetLowering::LowerCall_Darwin(
6315 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6316 bool isTailCall, bool isPatchPoint,
6317 const SmallVectorImpl<ISD::OutputArg> &Outs,
6318 const SmallVectorImpl<SDValue> &OutVals,
6319 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6320 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6321 ImmutableCallSite CS) const {
6322 unsigned NumOps = Outs.size();
6323
6324 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6325 bool isPPC64 = PtrVT == MVT::i64;
6326 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6327
6328 MachineFunction &MF = DAG.getMachineFunction();
6329
6330 // Mark this function as potentially containing a function that contains a
6331 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6332 // and restoring the callers stack pointer in this functions epilog. This is
6333 // done because by tail calling the called function might overwrite the value
6334 // in this function's (MF) stack pointer stack slot 0(SP).
6335 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6336 CallConv == CallingConv::Fast)
6337 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6338
6339 // Count how many bytes are to be pushed on the stack, including the linkage
6340 // area, and parameter passing area. We start with 24/48 bytes, which is
6341 // prereserved space for [SP][CR][LR][3 x unused].
6342 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6343 unsigned NumBytes = LinkageSize;
6344
6345 // Add up all the space actually used.
6346 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6347 // they all go in registers, but we must reserve stack space for them for
6348 // possible use by the caller. In varargs or 64-bit calls, parameters are
6349 // assigned stack space in order, with padding so Altivec parameters are
6350 // 16-byte aligned.
6351 unsigned nAltivecParamsAtEnd = 0;
6352 for (unsigned i = 0; i != NumOps; ++i) {
6353 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6354 EVT ArgVT = Outs[i].VT;
6355 // Varargs Altivec parameters are padded to a 16 byte boundary.
6356 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6357 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6358 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6359 if (!isVarArg && !isPPC64) {
6360 // Non-varargs Altivec parameters go after all the non-Altivec
6361 // parameters; handle those later so we know how much padding we need.
6362 nAltivecParamsAtEnd++;
6363 continue;
6364 }
6365 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6366 NumBytes = ((NumBytes+15)/16)*16;
6367 }
6368 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6369 }
6370
6371 // Allow for Altivec parameters at the end, if needed.
6372 if (nAltivecParamsAtEnd) {
6373 NumBytes = ((NumBytes+15)/16)*16;
6374 NumBytes += 16*nAltivecParamsAtEnd;
6375 }
6376
6377 // The prolog code of the callee may store up to 8 GPR argument registers to
6378 // the stack, allowing va_start to index over them in memory if its varargs.
6379 // Because we cannot tell if this is needed on the caller side, we have to
6380 // conservatively assume that it is needed. As such, make sure we have at
6381 // least enough stack space for the caller to store the 8 GPRs.
6382 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6383
6384 // Tail call needs the stack to be aligned.
6385 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6386 CallConv == CallingConv::Fast)
6387 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6388
6389 // Calculate by how many bytes the stack has to be adjusted in case of tail
6390 // call optimization.
6391 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6392
6393 // To protect arguments on the stack from being clobbered in a tail call,
6394 // force all the loads to happen before doing any other lowering.
6395 if (isTailCall)
6396 Chain = DAG.getStackArgumentTokenFactor(Chain);
6397
6398 // Adjust the stack pointer for the new arguments...
6399 // These operations are automatically eliminated by the prolog/epilog pass
6400 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6401 SDValue CallSeqStart = Chain;
6402
6403 // Load the return address and frame pointer so it can be move somewhere else
6404 // later.
6405 SDValue LROp, FPOp;
6406 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6407
6408 // Set up a copy of the stack pointer for use loading and storing any
6409 // arguments that may not fit in the registers available for argument
6410 // passing.
6411 SDValue StackPtr;
6412 if (isPPC64)
6413 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6414 else
6415 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6416
6417 // Figure out which arguments are going to go in registers, and which in
6418 // memory. Also, if this is a vararg function, floating point operations
6419 // must be stored to our stack, and loaded into integer regs as well, if
6420 // any integer regs are available for argument passing.
6421 unsigned ArgOffset = LinkageSize;
6422 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6423
6424 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6425 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6426 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6427 };
6428 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6429 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6430 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6431 };
6432 static const MCPhysReg VR[] = {
6433 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6434 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6435 };
6436 const unsigned NumGPRs = array_lengthof(GPR_32);
6437 const unsigned NumFPRs = 13;
6438 const unsigned NumVRs = array_lengthof(VR);
6439
6440 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6441
6442 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6443 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6444
6445 SmallVector<SDValue, 8> MemOpChains;
6446 for (unsigned i = 0; i != NumOps; ++i) {
6447 SDValue Arg = OutVals[i];
6448 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6449
6450 // PtrOff will be used to store the current argument to the stack if a
6451 // register cannot be found for it.
6452 SDValue PtrOff;
6453
6454 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6455
6456 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6457
6458 // On PPC64, promote integers to 64-bit values.
6459 if (isPPC64 && Arg.getValueType() == MVT::i32) {
6460 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6461 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6462 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6463 }
6464
6465 // FIXME memcpy is used way more than necessary. Correctness first.
6466 // Note: "by value" is code for passing a structure by value, not
6467 // basic types.
6468 if (Flags.isByVal()) {
6469 unsigned Size = Flags.getByValSize();
6470 // Very small objects are passed right-justified. Everything else is
6471 // passed left-justified.
6472 if (Size==1 || Size==2) {
6473 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6474 if (GPR_idx != NumGPRs) {
6475 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6476 MachinePointerInfo(), VT);
6477 MemOpChains.push_back(Load.getValue(1));
6478 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6479
6480 ArgOffset += PtrByteSize;
6481 } else {
6482 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6483 PtrOff.getValueType());
6484 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6485 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6486 CallSeqStart,
6487 Flags, DAG, dl);
6488 ArgOffset += PtrByteSize;
6489 }
6490 continue;
6491 }
6492 // Copy entire object into memory. There are cases where gcc-generated
6493 // code assumes it is there, even if it could be put entirely into
6494 // registers. (This is not what the doc says.)
6495 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6496 CallSeqStart,
6497 Flags, DAG, dl);
6498
6499 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6500 // copy the pieces of the object that fit into registers from the
6501 // parameter save area.
6502 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6503 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6504 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6505 if (GPR_idx != NumGPRs) {
6506 SDValue Load =
6507 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6508 MemOpChains.push_back(Load.getValue(1));
6509 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6510 ArgOffset += PtrByteSize;
6511 } else {
6512 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6513 break;
6514 }
6515 }
6516 continue;
6517 }
6518
6519 switch (Arg.getSimpleValueType().SimpleTy) {
6520 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6520)
;
6521 case MVT::i1:
6522 case MVT::i32:
6523 case MVT::i64:
6524 if (GPR_idx != NumGPRs) {
6525 if (Arg.getValueType() == MVT::i1)
6526 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6527
6528 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6529 } else {
6530 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6531 isPPC64, isTailCall, false, MemOpChains,
6532 TailCallArguments, dl);
6533 }
6534 ArgOffset += PtrByteSize;
6535 break;
6536 case MVT::f32:
6537 case MVT::f64:
6538 if (FPR_idx != NumFPRs) {
6539 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6540
6541 if (isVarArg) {
6542 SDValue Store =
6543 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6544 MemOpChains.push_back(Store);
6545
6546 // Float varargs are always shadowed in available integer registers
6547 if (GPR_idx != NumGPRs) {
6548 SDValue Load =
6549 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6550 MemOpChains.push_back(Load.getValue(1));
6551 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6552 }
6553 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6554 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6555 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6556 SDValue Load =
6557 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6558 MemOpChains.push_back(Load.getValue(1));
6559 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6560 }
6561 } else {
6562 // If we have any FPRs remaining, we may also have GPRs remaining.
6563 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6564 // GPRs.
6565 if (GPR_idx != NumGPRs)
6566 ++GPR_idx;
6567 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6568 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6569 ++GPR_idx;
6570 }
6571 } else
6572 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6573 isPPC64, isTailCall, false, MemOpChains,
6574 TailCallArguments, dl);
6575 if (isPPC64)
6576 ArgOffset += 8;
6577 else
6578 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6579 break;
6580 case MVT::v4f32:
6581 case MVT::v4i32:
6582 case MVT::v8i16:
6583 case MVT::v16i8:
6584 if (isVarArg) {
6585 // These go aligned on the stack, or in the corresponding R registers
6586 // when within range. The Darwin PPC ABI doc claims they also go in
6587 // V registers; in fact gcc does this only for arguments that are
6588 // prototyped, not for those that match the ... We do it for all
6589 // arguments, seems to work.
6590 while (ArgOffset % 16 !=0) {
6591 ArgOffset += PtrByteSize;
6592 if (GPR_idx != NumGPRs)
6593 GPR_idx++;
6594 }
6595 // We could elide this store in the case where the object fits
6596 // entirely in R registers. Maybe later.
6597 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6598 DAG.getConstant(ArgOffset, dl, PtrVT));
6599 SDValue Store =
6600 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6601 MemOpChains.push_back(Store);
6602 if (VR_idx != NumVRs) {
6603 SDValue Load =
6604 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6605 MemOpChains.push_back(Load.getValue(1));
6606 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6607 }
6608 ArgOffset += 16;
6609 for (unsigned i=0; i<16; i+=PtrByteSize) {
6610 if (GPR_idx == NumGPRs)
6611 break;
6612 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6613 DAG.getConstant(i, dl, PtrVT));
6614 SDValue Load =
6615 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6616 MemOpChains.push_back(Load.getValue(1));
6617 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6618 }
6619 break;
6620 }
6621
6622 // Non-varargs Altivec params generally go in registers, but have
6623 // stack space allocated at the end.
6624 if (VR_idx != NumVRs) {
6625 // Doesn't have GPR space allocated.
6626 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6627 } else if (nAltivecParamsAtEnd==0) {
6628 // We are emitting Altivec params in order.
6629 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6630 isPPC64, isTailCall, true, MemOpChains,
6631 TailCallArguments, dl);
6632 ArgOffset += 16;
6633 }
6634 break;
6635 }
6636 }
6637 // If all Altivec parameters fit in registers, as they usually do,
6638 // they get stack space following the non-Altivec parameters. We
6639 // don't track this here because nobody below needs it.
6640 // If there are more Altivec parameters than fit in registers emit
6641 // the stores here.
6642 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6643 unsigned j = 0;
6644 // Offset is aligned; skip 1st 12 params which go in V registers.
6645 ArgOffset = ((ArgOffset+15)/16)*16;
6646 ArgOffset += 12*16;
6647 for (unsigned i = 0; i != NumOps; ++i) {
6648 SDValue Arg = OutVals[i];
6649 EVT ArgType = Outs[i].VT;
6650 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6651 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6652 if (++j > NumVRs) {
6653 SDValue PtrOff;
6654 // We are emitting Altivec params in order.
6655 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6656 isPPC64, isTailCall, true, MemOpChains,
6657 TailCallArguments, dl);
6658 ArgOffset += 16;
6659 }
6660 }
6661 }
6662 }
6663
6664 if (!MemOpChains.empty())
6665 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6666
6667 // On Darwin, R12 must contain the address of an indirect callee. This does
6668 // not mean the MTCTR instruction must use R12; it's easier to model this as
6669 // an extra parameter, so do that.
6670 if (!isTailCall &&
6671 !isFunctionGlobalAddress(Callee) &&
6672 !isa<ExternalSymbolSDNode>(Callee) &&
6673 !isBLACompatibleAddress(Callee, DAG))
6674 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6675 PPC::R12), Callee));
6676
6677 // Build a sequence of copy-to-reg nodes chained together with token chain
6678 // and flag operands which copy the outgoing args into the appropriate regs.
6679 SDValue InFlag;
6680 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6681 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6682 RegsToPass[i].second, InFlag);
6683 InFlag = Chain.getValue(1);
6684 }
6685
6686 if (isTailCall)
6687 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6688 TailCallArguments);
6689
6690 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6691 /* unused except on PPC64 ELFv1 */ false, DAG,
6692 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6693 NumBytes, Ins, InVals, CS);
6694}
6695
6696
6697SDValue PPCTargetLowering::LowerCall_AIX(
6698 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6699 bool isTailCall, bool isPatchPoint,
6700 const SmallVectorImpl<ISD::OutputArg> &Outs,
6701 const SmallVectorImpl<SDValue> &OutVals,
6702 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6703 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6704 ImmutableCallSite CS) const {
6705
6706 assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&(((CallConv == CallingConv::C || CallConv == CallingConv::Fast
) && "Unimplemented calling convention!") ? static_cast
<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Fast) && \"Unimplemented calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6707, __PRETTY_FUNCTION__))
6707 "Unimplemented calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Fast
) && "Unimplemented calling convention!") ? static_cast
<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Fast) && \"Unimplemented calling convention!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6707, __PRETTY_FUNCTION__))
;
6708 if (isVarArg || isPatchPoint)
6709 report_fatal_error("This call type is unimplemented on AIX.");
6710
6711 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6712 bool isPPC64 = PtrVT == MVT::i64;
6713 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6714 unsigned NumOps = Outs.size();
6715
6716
6717 // Count how many bytes are to be pushed on the stack, including the linkage
6718 // area, parameter list area.
6719 // On XCOFF, we start with 24/48, which is reserved space for
6720 // [SP][CR][LR][2 x reserved][TOC].
6721 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6722
6723 // The prolog code of the callee may store up to 8 GPR argument registers to
6724 // the stack, allowing va_start to index over them in memory if the callee
6725 // is variadic.
6726 // Because we cannot tell if this is needed on the caller side, we have to
6727 // conservatively assume that it is needed. As such, make sure we have at
6728 // least enough stack space for the caller to store the 8 GPRs.
6729 unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
6730
6731 // Adjust the stack pointer for the new arguments...
6732 // These operations are automatically eliminated by the prolog/epilog
6733 // inserter pass.
6734 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6735 SDValue CallSeqStart = Chain;
6736
6737 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6738 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6739 PPC::R7, PPC::R8, PPC::R9, PPC::R10
6740 };
6741 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6742 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6743 PPC::X7, PPC::X8, PPC::X9, PPC::X10
6744 };
6745
6746 const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
6747 : array_lengthof(GPR_32);
6748 const unsigned NumFPRs = array_lengthof(FPR);
6749 assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "((NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
"on AIX") ? static_cast<void> (0) : __assert_fail ("NumFPRs == 13 && \"Only FPR 1-13 could be used for parameter passing \" \"on AIX\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6750, __PRETTY_FUNCTION__))
6750 "on AIX")((NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
"on AIX") ? static_cast<void> (0) : __assert_fail ("NumFPRs == 13 && \"Only FPR 1-13 could be used for parameter passing \" \"on AIX\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6750, __PRETTY_FUNCTION__))
;
6751
6752 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6753 unsigned GPR_idx = 0, FPR_idx = 0;
6754
6755 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6756
6757 if (isTailCall)
6758 report_fatal_error("Handling of tail call is unimplemented!");
6759 int SPDiff = 0;
6760
6761 for (unsigned i = 0; i != NumOps; ++i) {
6762 SDValue Arg = OutVals[i];
6763 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6764
6765 // Promote integers if needed.
6766 if (Arg.getValueType() == MVT::i1 ||
6767 (isPPC64 && Arg.getValueType() == MVT::i32)) {
6768 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6769 Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
6770 }
6771
6772 // Note: "by value" is code for passing a structure by value, not
6773 // basic types.
6774 if (Flags.isByVal())
6775 report_fatal_error("Passing structure by value is unimplemented!");
6776
6777 switch (Arg.getSimpleValueType().SimpleTy) {
6778 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6778)
;
6779 case MVT::i1:
6780 case MVT::i32:
6781 case MVT::i64:
6782 if (GPR_idx != NumGPRs)
6783 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6784 else
6785 report_fatal_error("Handling of placing parameters on the stack is "
6786 "unimplemented!");
6787 break;
6788 case MVT::f32:
6789 case MVT::f64:
6790 if (FPR_idx != NumFPRs) {
6791 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6792
6793 // If we have any FPRs remaining, we may also have GPRs remaining.
6794 // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
6795 // GPRs.
6796 if (GPR_idx != NumGPRs)
6797 ++GPR_idx;
6798 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
6799 ++GPR_idx;
6800 } else
6801 report_fatal_error("Handling of placing parameters on the stack is "
6802 "unimplemented!");
6803 break;
6804 case MVT::v4f32:
6805 case MVT::v4i32:
6806 case MVT::v8i16:
6807 case MVT::v16i8:
6808 case MVT::v2f64:
6809 case MVT::v2i64:
6810 case MVT::v1i128:
6811 case MVT::f128:
6812 case MVT::v4f64:
6813 case MVT::v4i1:
6814 report_fatal_error("Handling of this parameter type is unimplemented!");
6815 }
6816 }
6817
6818 if (!isFunctionGlobalAddress(Callee) &&
6819 !isa<ExternalSymbolSDNode>(Callee))
6820 report_fatal_error("Handling of indirect call is unimplemented!");
6821
6822 // Build a sequence of copy-to-reg nodes chained together with token chain
6823 // and flag operands which copy the outgoing args into the appropriate regs.
6824 SDValue InFlag;
6825 for (auto Reg : RegsToPass) {
6826 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
6827 InFlag = Chain.getValue(1);
6828 }
6829
6830 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6831 /* unused except on PPC64 ELFv1 */ false, DAG,
6832 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6833 NumBytes, Ins, InVals, CS);
6834}
6835
6836bool
6837PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6838 MachineFunction &MF, bool isVarArg,
6839 const SmallVectorImpl<ISD::OutputArg> &Outs,
6840 LLVMContext &Context) const {
6841 SmallVector<CCValAssign, 16> RVLocs;
6842 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6843 return CCInfo.CheckReturn(
6844 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6845 ? RetCC_PPC_Cold
6846 : RetCC_PPC);
6847}
6848
6849SDValue
6850PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6851 bool isVarArg,
6852 const SmallVectorImpl<ISD::OutputArg> &Outs,
6853 const SmallVectorImpl<SDValue> &OutVals,
6854 const SDLoc &dl, SelectionDAG &DAG) const {
6855 SmallVector<CCValAssign, 16> RVLocs;
6856 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6857 *DAG.getContext());
6858 CCInfo.AnalyzeReturn(Outs,
6859 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6860 ? RetCC_PPC_Cold
6861 : RetCC_PPC);
6862
6863 SDValue Flag;
6864 SmallVector<SDValue, 4> RetOps(1, Chain);
6865
6866 // Copy the result values into the output registers.
6867 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
6868 CCValAssign &VA = RVLocs[i];
6869 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6869, __PRETTY_FUNCTION__))
;
6870
6871 SDValue Arg = OutVals[RealResIdx];
6872
6873 switch (VA.getLocInfo()) {
6874 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6874)
;
6875 case CCValAssign::Full: break;
6876 case CCValAssign::AExt:
6877 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6878 break;
6879 case CCValAssign::ZExt:
6880 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6881 break;
6882 case CCValAssign::SExt:
6883 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6884 break;
6885 }
6886 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
6887 bool isLittleEndian = Subtarget.isLittleEndian();
6888 // Legalize ret f64 -> ret 2 x i32.
6889 SDValue SVal =
6890 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6891 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
6892 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6893 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6894 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6895 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
6896 Flag = Chain.getValue(1);
6897 VA = RVLocs[++i]; // skip ahead to next loc
6898 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6899 } else
6900 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6901 Flag = Chain.getValue(1);
6902 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6903 }
6904
6905 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6906 const MCPhysReg *I =
6907 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6908 if (I) {
6909 for (; *I; ++I) {
6910
6911 if (PPC::G8RCRegClass.contains(*I))
6912 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6913 else if (PPC::F8RCRegClass.contains(*I))
6914 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6915 else if (PPC::CRRCRegClass.contains(*I))
6916 RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6917 else if (PPC::VRRCRegClass.contains(*I))
6918 RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6919 else
6920 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6920)
;
6921 }
6922 }
6923
6924 RetOps[0] = Chain; // Update chain.
6925
6926 // Add the flag if we have it.
6927 if (Flag.getNode())
6928 RetOps.push_back(Flag);
6929
6930 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6931}
6932
6933SDValue
6934PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6935 SelectionDAG &DAG) const {
6936 SDLoc dl(Op);
6937
6938 // Get the correct type for integers.
6939 EVT IntVT = Op.getValueType();
6940
6941 // Get the inputs.
6942 SDValue Chain = Op.getOperand(0);
6943 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6944 // Build a DYNAREAOFFSET node.
6945 SDValue Ops[2] = {Chain, FPSIdx};
6946 SDVTList VTs = DAG.getVTList(IntVT);
6947 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6948}
6949
6950SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6951 SelectionDAG &DAG) const {
6952 // When we pop the dynamic allocation we need to restore the SP link.
6953 SDLoc dl(Op);
6954
6955 // Get the correct type for pointers.
6956 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6957
6958 // Construct the stack pointer operand.
6959 bool isPPC64 = Subtarget.isPPC64();
6960 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6961 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6962
6963 // Get the operands for the STACKRESTORE.
6964 SDValue Chain = Op.getOperand(0);
6965 SDValue SaveSP = Op.getOperand(1);
6966
6967 // Load the old link SP.
6968 SDValue LoadLinkSP =
6969 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6970
6971 // Restore the stack pointer.
6972 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6973
6974 // Store the old link SP.
6975 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6976}
6977
6978SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6979 MachineFunction &MF = DAG.getMachineFunction();
6980 bool isPPC64 = Subtarget.isPPC64();
6981 EVT PtrVT = getPointerTy(MF.getDataLayout());
6982
6983 // Get current frame pointer save index. The users of this index will be
6984 // primarily DYNALLOC instructions.
6985 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6986 int RASI = FI->getReturnAddrSaveIndex();
6987
6988 // If the frame pointer save index hasn't been defined yet.
6989 if (!RASI) {
6990 // Find out what the fix offset of the frame pointer save area.
6991 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6992 // Allocate the frame index for frame pointer save area.
6993 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6994 // Save the result.
6995 FI->setReturnAddrSaveIndex(RASI);
6996 }
6997 return DAG.getFrameIndex(RASI, PtrVT);
6998}
6999
7000SDValue
7001PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7002 MachineFunction &MF = DAG.getMachineFunction();
7003 bool isPPC64 = Subtarget.isPPC64();
7004 EVT PtrVT = getPointerTy(MF.getDataLayout());
7005
7006 // Get current frame pointer save index. The users of this index will be
7007 // primarily DYNALLOC instructions.
7008 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7009 int FPSI = FI->getFramePointerSaveIndex();
7010
7011 // If the frame pointer save index hasn't been defined yet.
7012 if (!FPSI) {
7013 // Find out what the fix offset of the frame pointer save area.
7014 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7015 // Allocate the frame index for frame pointer save area.
7016 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7017 // Save the result.
7018 FI->setFramePointerSaveIndex(FPSI);
7019 }
7020 return DAG.getFrameIndex(FPSI, PtrVT);
7021}
7022
7023SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7024 SelectionDAG &DAG) const {
7025 // Get the inputs.
7026 SDValue Chain = Op.getOperand(0);
7027 SDValue Size = Op.getOperand(1);
7028 SDLoc dl(Op);
7029
7030 // Get the correct type for pointers.
7031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7032 // Negate the size.
7033 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7034 DAG.getConstant(0, dl, PtrVT), Size);
7035 // Construct a node for the frame pointer save index.
7036 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7037 // Build a DYNALLOC node.
7038 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7039 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7040 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7041}
7042
7043SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7044 SelectionDAG &DAG) const {
7045 MachineFunction &MF = DAG.getMachineFunction();
7046
7047 bool isPPC64 = Subtarget.isPPC64();
7048 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7049
7050 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7051 return DAG.getFrameIndex(FI, PtrVT);
7052}
7053
7054SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7055 SelectionDAG &DAG) const {
7056 SDLoc DL(Op);
7057 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7058 DAG.getVTList(MVT::i32, MVT::Other),
7059 Op.getOperand(0), Op.getOperand(1));
7060}
7061
7062SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7063 SelectionDAG &DAG) const {
7064 SDLoc DL(Op);
7065 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7066 Op.getOperand(0), Op.getOperand(1));
7067}
7068
7069SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7070 if (Op.getValueType().isVector())
7071 return LowerVectorLoad(Op, DAG);
7072
7073 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7074, __PRETTY_FUNCTION__))
7074 "Custom lowering only for i1 loads")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7074, __PRETTY_FUNCTION__))
;
7075
7076 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7077
7078 SDLoc dl(Op);
7079 LoadSDNode *LD = cast<LoadSDNode>(Op);
7080
7081 SDValue Chain = LD->getChain();
7082 SDValue BasePtr = LD->getBasePtr();
7083 MachineMemOperand *MMO = LD->getMemOperand();
7084
7085 SDValue NewLD =
7086 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7087 BasePtr, MVT::i8, MMO);
7088 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7089
7090 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7091 return DAG.getMergeValues(Ops, dl);
7092}
7093
7094SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7095 if (Op.getOperand(1).getValueType().isVector())
7096 return LowerVectorStore(Op, DAG);
7097
7098 assert(Op.getOperand(1).getValueType() == MVT::i1 &&((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7099, __PRETTY_FUNCTION__))
7099 "Custom lowering only for i1 stores")((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7099, __PRETTY_FUNCTION__))
;
7100
7101 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7102
7103 SDLoc dl(Op);
7104 StoreSDNode *ST = cast<StoreSDNode>(Op);
7105
7106 SDValue Chain = ST->getChain();
7107 SDValue BasePtr = ST->getBasePtr();
7108 SDValue Value = ST->getValue();
7109 MachineMemOperand *MMO = ST->getMemOperand();
7110
7111 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7112 Value);
7113 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7114}
7115
7116// FIXME: Remove this once the ANDI glue bug is fixed:
7117SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7118 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7119, __PRETTY_FUNCTION__))
7119 "Custom lowering only for i1 results")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7119, __PRETTY_FUNCTION__))
;
7120
7121 SDLoc DL(Op);
7122 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
7123 Op.getOperand(0));
7124}
7125
7126SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7127 SelectionDAG &DAG) const {
7128
7129 // Implements a vector truncate that fits in a vector register as a shuffle.
7130 // We want to legalize vector truncates down to where the source fits in
7131 // a vector register (and target is therefore smaller than vector register
7132 // size). At that point legalization will try to custom lower the sub-legal
7133 // result and get here - where we can contain the truncate as a single target
7134 // operation.
7135
7136 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7137 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7138 //
7139 // We will implement it for big-endian ordering as this (where x denotes
7140 // undefined):
7141 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7142 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7143 //
7144 // The same operation in little-endian ordering will be:
7145 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7146 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7147
7148 assert(Op.getValueType().isVector() && "Vector type expected.")((Op.getValueType().isVector() && "Vector type expected."
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType().isVector() && \"Vector type expected.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7148, __PRETTY_FUNCTION__))
;
7149
7150 SDLoc DL(Op);
7151 SDValue N1 = Op.getOperand(0);
7152 unsigned SrcSize = N1.getValueType().getSizeInBits();
7153 assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector")((SrcSize <= 128 && "Source must fit in an Altivec/VSX vector"
) ? static_cast<void> (0) : __assert_fail ("SrcSize <= 128 && \"Source must fit in an Altivec/VSX vector\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7153, __PRETTY_FUNCTION__))
;
7154 SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7155
7156 EVT TrgVT = Op.getValueType();
7157 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7158 EVT EltVT = TrgVT.getVectorElementType();
7159 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7160 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7161
7162 // First list the elements we want to keep.
7163 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7164 SmallVector<int, 16> ShuffV;
7165 if (Subtarget.isLittleEndian())
7166 for (unsigned i = 0; i < TrgNumElts; ++i)
7167 ShuffV.push_back(i * SizeMult);
7168 else
7169 for (unsigned i = 1; i <= TrgNumElts; ++i)
7170 ShuffV.push_back(i * SizeMult - 1);
7171
7172 // Populate the remaining elements with undefs.
7173 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7174 // ShuffV.push_back(i + WideNumElts);
7175 ShuffV.push_back(WideNumElts + 1);
7176
7177 SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7178 return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7179}
7180
7181/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7182/// possible.
7183SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7184 // Not FP? Not a fsel.
7185 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7186 !Op.getOperand(2).getValueType().isFloatingPoint())
7187 return Op;
7188
7189 // We might be able to do better than this under some circumstances, but in
7190 // general, fsel-based lowering of select is a finite-math-only optimization.
7191 // For more information, see section F.3 of the 2.06 ISA specification.
7192 if (!DAG.getTarget().Options.NoInfsFPMath ||
7193 !DAG.getTarget().Options.NoNaNsFPMath)
7194 return Op;
7195 // TODO: Propagate flags from the select rather than global settings.
7196 SDNodeFlags Flags;
7197 Flags.setNoInfs(true);
7198 Flags.setNoNaNs(true);
7199
7200 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7201
7202 EVT ResVT = Op.getValueType();
7203 EVT CmpVT = Op.getOperand(0).getValueType();
7204 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7205 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7206 SDLoc dl(Op);
7207
7208 // If the RHS of the comparison is a 0.0, we don't need to do the
7209 // subtraction at all.
7210 SDValue Sel1;
7211 if (isFloatingPointZero(RHS))
7212 switch (CC) {
7213 default: break; // SETUO etc aren't handled by fsel.
7214 case ISD::SETNE:
7215 std::swap(TV, FV);
7216 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7217 case ISD::SETEQ:
7218 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7219 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7220 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7221 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7222 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7223 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7224 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7225 case ISD::SETULT:
7226 case ISD::SETLT:
7227 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7228 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7229 case ISD::SETOGE:
7230 case ISD::SETGE:
7231 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7232 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7233 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7234 case ISD::SETUGT:
7235 case ISD::SETGT:
7236 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7237 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7238 case ISD::SETOLE:
7239 case ISD::SETLE:
7240 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7241 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7242 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7243 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7244 }
7245
7246 SDValue Cmp;
7247 switch (CC) {
7248 default: break; // SETUO etc aren't handled by fsel.
7249 case ISD::SETNE:
7250 std::swap(TV, FV);
7251 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7252 case ISD::SETEQ:
7253 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7254 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7255 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7256 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7257 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7258 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7259 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7260 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7261 case ISD::SETULT:
7262 case ISD::SETLT:
7263 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7264 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7265 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7266 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7267 case ISD::SETOGE:
7268 case ISD::SETGE:
7269 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7270 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7271 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7272 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7273 case ISD::SETUGT:
7274 case ISD::SETGT:
7275 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7276 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7277 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7278 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7279 case ISD::SETOLE:
7280 case ISD::SETLE:
7281 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7282 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7283 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7284 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7285 }
7286 return Op;
7287}
7288
7289void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7290 SelectionDAG &DAG,
7291 const SDLoc &dl) const {
7292 assert(Op.getOperand(0).getValueType().isFloatingPoint())((Op.getOperand(0).getValueType().isFloatingPoint()) ? static_cast
<void> (0) : __assert_fail ("Op.getOperand(0).getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7292, __PRETTY_FUNCTION__))
;
7293 SDValue Src = Op.getOperand(0);
7294 if (Src.getValueType() == MVT::f32)
7295 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7296
7297 SDValue Tmp;
7298 switch (Op.getSimpleValueType().SimpleTy) {
7299 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7299)
;
7300 case MVT::i32:
7301 Tmp = DAG.getNode(
7302 Op.getOpcode() == ISD::FP_TO_SINT
7303 ? PPCISD::FCTIWZ
7304 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7305 dl, MVT::f64, Src);
7306 break;
7307 case MVT::i64:
7308 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT())
&& "i64 FP_TO_UINT is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7309, __PRETTY_FUNCTION__))
7309 "i64 FP_TO_UINT is supported only with FPCVT")(((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT())
&& "i64 FP_TO_UINT is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7309, __PRETTY_FUNCTION__))
;
7310 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7311 PPCISD::FCTIDUZ,
7312 dl, MVT::f64, Src);
7313 break;
7314 }
7315
7316 // Convert the FP value to an int value through memory.
7317 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7318 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
7319 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7320 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7321 MachinePointerInfo MPI =
7322 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
7323
7324 // Emit a store to the stack slot.
7325 SDValue Chain;
7326 if (i32Stack) {
7327 MachineFunction &MF = DAG.getMachineFunction();
7328 MachineMemOperand *MMO =
7329 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
7330 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
7331 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7332 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7333 } else
7334 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
7335
7336 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7337 // add in a bias on big endian.
7338 if (Op.getValueType() == MVT::i32 && !i32Stack) {
7339 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7340 DAG.getConstant(4, dl, FIPtr.getValueType()));
7341 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7342 }
7343
7344 RLI.Chain = Chain;
7345 RLI.Ptr = FIPtr;
7346 RLI.MPI = MPI;
7347}
7348
7349/// Custom lowers floating point to integer conversions to use
7350/// the direct move instructions available in ISA 2.07 to avoid the
7351/// need for load/store combinations.
7352SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7353 SelectionDAG &DAG,
7354 const SDLoc &dl) const {
7355 assert(Op.getOperand(0).getValueType().isFloatingPoint())((Op.getOperand(0).getValueType().isFloatingPoint()) ? static_cast
<void> (0) : __assert_fail ("Op.getOperand(0).getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7355, __PRETTY_FUNCTION__))
;
7356 SDValue Src = Op.getOperand(0);
7357
7358 if (Src.getValueType() == MVT::f32)
7359 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7360
7361 SDValue Tmp;
7362 switch (Op.getSimpleValueType().SimpleTy) {
7363 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7363)
;
7364 case MVT::i32:
7365 Tmp = DAG.getNode(
7366 Op.getOpcode() == ISD::FP_TO_SINT
7367 ? PPCISD::FCTIWZ
7368 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7369 dl, MVT::f64, Src);
7370 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
7371 break;
7372 case MVT::i64:
7373 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT())
&& "i64 FP_TO_UINT is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7374, __PRETTY_FUNCTION__))
7374 "i64 FP_TO_UINT is supported only with FPCVT")(((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT())
&& "i64 FP_TO_UINT is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7374, __PRETTY_FUNCTION__))
;
7375 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7376 PPCISD::FCTIDUZ,
7377 dl, MVT::f64, Src);
7378 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
7379 break;
7380 }
7381 return Tmp;
7382}
7383
7384SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7385 const SDLoc &dl) const {
7386
7387 // FP to INT conversions are legal for f128.
7388 if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
7389 return Op;
7390
7391 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7392 // PPC (the libcall is not available).
7393 if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
7394 if (Op.getValueType() == MVT::i32) {
7395 if (Op.getOpcode() == ISD::FP_TO_SINT) {
7396 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7397 MVT::f64, Op.getOperand(0),
7398 DAG.getIntPtrConstant(0, dl));
7399 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7400 MVT::f64, Op.getOperand(0),
7401 DAG.getIntPtrConstant(1, dl));
7402
7403 // Add the two halves of the long double in round-to-zero mode.
7404 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7405
7406 // Now use a smaller FP_TO_SINT.
7407 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7408 }
7409 if (Op.getOpcode() == ISD::FP_TO_UINT) {
7410 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7411 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7412 SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
7413 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7414 // FIXME: generated code sucks.
7415 // TODO: Are there fast-math-flags to propagate to this FSUB?
7416 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
7417 Op.getOperand(0), Tmp);
7418 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7419 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
7420 DAG.getConstant(0x80000000, dl, MVT::i32));
7421 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
7422 Op.getOperand(0));
7423 return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
7424 ISD::SETGE);
7425 }
7426 }
7427
7428 return SDValue();
7429 }
7430
7431 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7432 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7433
7434 ReuseLoadInfo RLI;
7435 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7436
7437 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7438 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7439}
7440
7441// We're trying to insert a regular store, S, and then a load, L. If the
7442// incoming value, O, is a load, we might just be able to have our load use the
7443// address used by O. However, we don't know if anything else will store to
7444// that address before we can load from it. To prevent this situation, we need
7445// to insert our load, L, into the chain as a peer of O. To do this, we give L
7446// the same chain operand as O, we create a token factor from the chain results
7447// of O and L, and we replace all uses of O's chain result with that token
7448// factor (see spliceIntoChain below for this last part).
7449bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7450 ReuseLoadInfo &RLI,
7451 SelectionDAG &DAG,
7452 ISD::LoadExtType ET) const {
7453 SDLoc dl(Op);
7454 if (ET == ISD::NON_EXTLOAD &&
7455 (Op.getOpcode() == ISD::FP_TO_UINT ||
7456 Op.getOpcode() == ISD::FP_TO_SINT) &&
7457 isOperationLegalOrCustom(Op.getOpcode(),
7458 Op.getOperand(0).getValueType())) {
7459
7460 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7461 return true;
7462 }
7463
7464 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7465 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7466 LD->isNonTemporal())
7467 return false;
7468 if (LD->getMemoryVT() != MemVT)
7469 return false;
7470
7471 RLI.Ptr = LD->getBasePtr();
7472 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7473 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7474, __PRETTY_FUNCTION__))
7474 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7474, __PRETTY_FUNCTION__))
;
7475 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7476 LD->getOffset());
7477 }
7478
7479 RLI.Chain = LD->getChain();
7480 RLI.MPI = LD->getPointerInfo();
7481 RLI.IsDereferenceable = LD->isDereferenceable();
7482 RLI.IsInvariant = LD->isInvariant();
7483 RLI.Alignment = LD->getAlignment();
7484 RLI.AAInfo = LD->getAAInfo();
7485 RLI.Ranges = LD->getRanges();
7486
7487 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7488 return true;
7489}
7490
7491// Given the head of the old chain, ResChain, insert a token factor containing
7492// it and NewResChain, and make users of ResChain now be users of that token
7493// factor.
7494// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7495void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7496 SDValue NewResChain,
7497 SelectionDAG &DAG) const {
7498 if (!ResChain)
7499 return;
7500
7501 SDLoc dl(NewResChain);
7502
7503 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
7504 NewResChain, DAG.getUNDEF(MVT::Other));
7505 assert(TF.getNode() != NewResChain.getNode() &&((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7506, __PRETTY_FUNCTION__))
7506 "A new TF really is required here")((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7506, __PRETTY_FUNCTION__))
;
7507
7508 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7509 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7510}
7511
7512/// Analyze profitability of direct move
7513/// prefer float load to int load plus direct move
7514/// when there is no integer use of int load
7515bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7516 SDNode *Origin = Op.getOperand(0).getNode();
7517 if (Origin->getOpcode() != ISD::LOAD)
7518 return true;
7519
7520 // If there is no LXSIBZX/LXSIHZX, like Power8,
7521 // prefer direct move if the memory size is 1 or 2 bytes.
7522 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7523 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7524 return true;
7525
7526 for (SDNode::use_iterator UI = Origin->use_begin(),
7527 UE = Origin->use_end();
7528 UI != UE; ++UI) {
7529
7530 // Only look at the users of the loaded value.
7531 if (UI.getUse().get().getResNo() != 0)
7532 continue;
7533
7534 if (UI->getOpcode() != ISD::SINT_TO_FP &&
7535 UI->getOpcode() != ISD::UINT_TO_FP)
7536 return true;
7537 }
7538
7539 return false;
7540}
7541
7542/// Custom lowers integer to floating point conversions to use
7543/// the direct move instructions available in ISA 2.07 to avoid the
7544/// need for load/store combinations.
7545SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7546 SelectionDAG &DAG,
7547 const SDLoc &dl) const {
7548 assert((Op.getValueType() == MVT::f32 ||(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7550, __PRETTY_FUNCTION__))
7549 Op.getValueType() == MVT::f64) &&(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7550, __PRETTY_FUNCTION__))
7550 "Invalid floating point type as target of conversion")(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7550, __PRETTY_FUNCTION__))
;
7551 assert(Subtarget.hasFPCVT() &&((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7552, __PRETTY_FUNCTION__))
7552 "Int to FP conversions with direct moves require FPCVT")((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7552, __PRETTY_FUNCTION__))
;
7553 SDValue FP;
7554 SDValue Src = Op.getOperand(0);
7555 bool SinglePrec = Op.getValueType() == MVT::f32;
7556 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7557 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
7558 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
7559 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
7560
7561 if (WordInt) {
7562 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
7563 dl, MVT::f64, Src);
7564 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7565 }
7566 else {
7567 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
7568 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7569 }
7570
7571 return FP;
7572}
7573
7574static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7575
7576 EVT VecVT = Vec.getValueType();
7577 assert(VecVT.isVector() && "Expected a vector type.")((VecVT.isVector() && "Expected a vector type.") ? static_cast
<void> (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7577, __PRETTY_FUNCTION__))
;
7578 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")((VecVT.getSizeInBits() < 128 && "Vector is already full width."
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7578, __PRETTY_FUNCTION__))
;
7579
7580 EVT EltVT = VecVT.getVectorElementType();
7581 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7582 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7583
7584 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7585 SmallVector<SDValue, 16> Ops(NumConcat);
7586 Ops[0] = Vec;
7587 SDValue UndefVec = DAG.getUNDEF(VecVT);
7588 for (unsigned i = 1; i < NumConcat; ++i)
7589 Ops[i] = UndefVec;
7590
7591 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7592}
7593
7594SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7595 const SDLoc &dl) const {
7596
7597 unsigned Opc = Op.getOpcode();
7598 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7599, __PRETTY_FUNCTION__))
7599 "Unexpected conversion type")(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7599, __PRETTY_FUNCTION__))
;
7600 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7601, __PRETTY_FUNCTION__))
7601 "Supports conversions to v2f64/v4f32 only.")(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7601, __PRETTY_FUNCTION__))
;
7602
7603 bool SignedConv = Opc == ISD::SINT_TO_FP;
7604 bool FourEltRes = Op.getValueType() == MVT::v4f32;
7605
7606 SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
7607 EVT WideVT = Wide.getValueType();
7608 unsigned WideNumElts = WideVT.getVectorNumElements();
7609 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7610
7611 SmallVector<int, 16> ShuffV;
7612 for (unsigned i = 0; i < WideNumElts; ++i)
7613 ShuffV.push_back(i + WideNumElts);
7614
7615 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7616 int SaveElts = FourEltRes ? 4 : 2;
7617 if (Subtarget.isLittleEndian())
7618 for (int i = 0; i < SaveElts; i++)
7619 ShuffV[i * Stride] = i;
7620 else
7621 for (int i = 1; i <= SaveElts; i++)
7622 ShuffV[i * Stride - 1] = i - 1;
7623
7624 SDValue ShuffleSrc2 =
7625 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7626 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7627 unsigned ExtendOp =
7628 SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
7629
7630 SDValue Extend;
7631 if (!Subtarget.hasP9Altivec() && SignedConv) {
7632 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7633 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7634 DAG.getValueType(Op.getOperand(0).getValueType()));
7635 } else
7636 Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
7637
7638 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7639}
7640
7641SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7642 SelectionDAG &DAG) const {
7643 SDLoc dl(Op);
7644
7645 EVT InVT = Op.getOperand(0).getValueType();
7646 EVT OutVT = Op.getValueType();
7647 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7648 isOperationCustom(Op.getOpcode(), InVT))
7649 return LowerINT_TO_FPVector(Op, DAG, dl);
7650
7651 // Conversions to f128 are legal.
7652 if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
7653 return Op;
7654
7655 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
7656 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
7657 return SDValue();
7658
7659 SDValue Value = Op.getOperand(0);
7660 // The values are now known to be -1 (false) or 1 (true). To convert this
7661 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7662 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7663 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7664
7665 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7666
7667 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7668
7669 if (Op.getValueType() != MVT::v4f64)
7670 Value = DAG.getNode(ISD::FP_ROUND, dl,
7671 Op.getValueType(), Value,
7672 DAG.getIntPtrConstant(1, dl));
7673 return Value;
7674 }
7675
7676 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7677 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7678 return SDValue();
7679
7680 if (Op.getOperand(0).getValueType() == MVT::i1)
7681 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7682 DAG.getConstantFP(1.0, dl, Op.getValueType()),
7683 DAG.getConstantFP(0.0, dl, Op.getValueType()));
7684
7685 // If we have direct moves, we can do all the conversion, skip the store/load
7686 // however, without FPCVT we can't do most conversions.
7687 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7688 Subtarget.isPPC64() && Subtarget.hasFPCVT())
7689 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7690
7691 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7692, __PRETTY_FUNCTION__))
7692 "UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7692, __PRETTY_FUNCTION__))
;
7693
7694 // If we have FCFIDS, then use it when converting to single-precision.
7695 // Otherwise, convert to double-precision and then round.
7696 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7697 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7698 : PPCISD::FCFIDS)
7699 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7700 : PPCISD::FCFID);
7701 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7702 ? MVT::f32
7703 : MVT::f64;
7704
7705 if (Op.getOperand(0).getValueType() == MVT::i64) {
7706 SDValue SINT = Op.getOperand(0);
7707 // When converting to single-precision, we actually need to convert
7708 // to double-precision first and then round to single-precision.
7709 // To avoid double-rounding effects during that operation, we have
7710 // to prepare the input operand. Bits that might be truncated when
7711 // converting to double-precision are replaced by a bit that won't
7712 // be lost at this stage, but is below the single-precision rounding
7713 // position.
7714 //
7715 // However, if -enable-unsafe-fp-math is in effect, accept double
7716 // rounding to avoid the extra overhead.
7717 if (Op.getValueType() == MVT::f32 &&
7718 !Subtarget.hasFPCVT() &&
7719 !DAG.getTarget().Options.UnsafeFPMath) {
7720
7721 // Twiddle input to make sure the low 11 bits are zero. (If this
7722 // is the case, we are guaranteed the value will fit into the 53 bit
7723 // mantissa of an IEEE double-precision value without rounding.)
7724 // If any of those low 11 bits were not zero originally, make sure
7725 // bit 12 (value 2048) is set instead, so that the final rounding
7726 // to single-precision gets the correct result.
7727 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7728 SINT, DAG.getConstant(2047, dl, MVT::i64));
7729 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7730 Round, DAG.getConstant(2047, dl, MVT::i64));
7731 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7732 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7733 Round, DAG.getConstant(-2048, dl, MVT::i64));
7734
7735 // However, we cannot use that value unconditionally: if the magnitude
7736 // of the input value is small, the bit-twiddling we did above might
7737 // end up visibly changing the output. Fortunately, in that case, we
7738 // don't need to twiddle bits since the original input will convert
7739 // exactly to double-precision floating-point already. Therefore,
7740 // construct a conditional to use the original value if the top 11
7741 // bits are all sign-bit copies, and use the rounded value computed
7742 // above otherwise.
7743 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7744 SINT, DAG.getConstant(53, dl, MVT::i32));
7745 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7746 Cond, DAG.getConstant(1, dl, MVT::i64));
7747 Cond = DAG.getSetCC(dl, MVT::i32,
7748 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7749
7750 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7751 }
7752
7753 ReuseLoadInfo RLI;
7754 SDValue Bits;
7755
7756 MachineFunction &MF = DAG.getMachineFunction();
7757 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7758 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7759 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7760 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7761 } else if (Subtarget.hasLFIWAX() &&
7762 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7763 MachineMemOperand *MMO =
7764 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7765 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7766 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7767 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7768 DAG.getVTList(MVT::f64, MVT::Other),
7769 Ops, MVT::i32, MMO);
7770 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7771 } else if (Subtarget.hasFPCVT() &&
7772 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7773 MachineMemOperand *MMO =
7774 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7775 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7776 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7777 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7778 DAG.getVTList(MVT::f64, MVT::Other),
7779 Ops, MVT::i32, MMO);
7780 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7781 } else if (((Subtarget.hasLFIWAX() &&
7782 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7783 (Subtarget.hasFPCVT() &&
7784 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7785 SINT.getOperand(0).getValueType() == MVT::i32) {
7786 MachineFrameInfo &MFI = MF.getFrameInfo();
7787 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7788
7789 int FrameIdx = MFI.CreateStackObject(4, 4, false);
7790 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7791
7792 SDValue Store =
7793 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7794 MachinePointerInfo::getFixedStack(
7795 DAG.getMachineFunction(), FrameIdx));
7796
7797 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7798, __PRETTY_FUNCTION__))
7798 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7798, __PRETTY_FUNCTION__))
;
7799
7800 RLI.Ptr = FIdx;
7801 RLI.Chain = Store;
7802 RLI.MPI =
7803 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7804 RLI.Alignment = 4;
7805
7806 MachineMemOperand *MMO =
7807 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7808 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7809 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7810 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7811 PPCISD::LFIWZX : PPCISD::LFIWAX,
7812 dl, DAG.getVTList(MVT::f64, MVT::Other),
7813 Ops, MVT::i32, MMO);
7814 } else
7815 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7816
7817 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7818
7819 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7820 FP = DAG.getNode(ISD::FP_ROUND, dl,
7821 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7822 return FP;
7823 }
7824
7825 assert(Op.getOperand(0).getValueType() == MVT::i32 &&((Op.getOperand(0).getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7826, __PRETTY_FUNCTION__))
7826 "Unhandled INT_TO_FP type in custom expander!")((Op.getOperand(0).getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7826, __PRETTY_FUNCTION__))
;
7827 // Since we only generate this in 64-bit mode, we can take advantage of
7828 // 64-bit registers. In particular, sign extend the input value into the
7829 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7830 // then lfd it and fcfid it.
7831 MachineFunction &MF = DAG.getMachineFunction();
7832 MachineFrameInfo &MFI = MF.getFrameInfo();
7833 EVT PtrVT = getPointerTy(MF.getDataLayout());
7834
7835 SDValue Ld;
7836 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7837 ReuseLoadInfo RLI;
7838 bool ReusingLoad;
7839 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7840 DAG))) {
7841 int FrameIdx = MFI.CreateStackObject(4, 4, false);
7842 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7843
7844 SDValue Store =
7845 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7846 MachinePointerInfo::getFixedStack(
7847 DAG.getMachineFunction(), FrameIdx));
7848
7849 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7850, __PRETTY_FUNCTION__))
7850 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7850, __PRETTY_FUNCTION__))
;
7851
7852 RLI.Ptr = FIdx;
7853 RLI.Chain = Store;
7854 RLI.MPI =
7855 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7856 RLI.Alignment = 4;
7857 }
7858
7859 MachineMemOperand *MMO =
7860 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7861 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7862 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7863 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7864 PPCISD::LFIWZX : PPCISD::LFIWAX,
7865 dl, DAG.getVTList(MVT::f64, MVT::Other),
7866 Ops, MVT::i32, MMO);
7867 if (ReusingLoad)
7868 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7869 } else {
7870 assert(Subtarget.isPPC64() &&((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7871, __PRETTY_FUNCTION__))
7871 "i32->FP without LFIWAX supported only on PPC64")((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7871, __PRETTY_FUNCTION__))
;
7872
7873 int FrameIdx = MFI.CreateStackObject(8, 8, false);
7874 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7875
7876 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7877 Op.getOperand(0));
7878
7879 // STD the extended value into the stack slot.
7880 SDValue Store = DAG.getStore(
7881 DAG.getEntryNode(), dl, Ext64, FIdx,
7882 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7883
7884 // Load the value as a double.
7885 Ld = DAG.getLoad(
7886 MVT::f64, dl, Store, FIdx,
7887 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7888 }
7889
7890 // FCFID it and return it.
7891 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7892 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7893 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7894 DAG.getIntPtrConstant(0, dl));
7895 return FP;
7896}
7897
7898SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7899 SelectionDAG &DAG) const {
7900 SDLoc dl(Op);
7901 /*
7902 The rounding mode is in bits 30:31 of FPSR, and has the following
7903 settings:
7904 00 Round to nearest
7905 01 Round to 0
7906 10 Round to +inf
7907 11 Round to -inf
7908
7909 FLT_ROUNDS, on the other hand, expects the following:
7910 -1 Undefined
7911 0 Round to 0
7912 1 Round to nearest
7913 2 Round to +inf
7914 3 Round to -inf
7915
7916 To perform the conversion, we do:
7917 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7918 */
7919
7920 MachineFunction &MF = DAG.getMachineFunction();
7921 EVT VT = Op.getValueType();
7922 EVT PtrVT = getPointerTy(MF.getDataLayout());
7923
7924 // Save FP Control Word to register
7925 EVT NodeTys[] = {
7926 MVT::f64, // return register
7927 MVT::Glue // unused in this context
7928 };
7929 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7930
7931 // Save FP register to stack slot
7932 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7933 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7934 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7935 MachinePointerInfo());
7936
7937 // Load FP Control Word from low 32 bits of stack slot.
7938 SDValue Four = DAG.getConstant(4, dl, PtrVT);
7939 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7940 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7941
7942 // Transform as necessary
7943 SDValue CWD1 =
7944 DAG.getNode(ISD::AND, dl, MVT::i32,
7945 CWD, DAG.getConstant(3, dl, MVT::i32));
7946 SDValue CWD2 =
7947 DAG.getNode(ISD::SRL, dl, MVT::i32,
7948 DAG.getNode(ISD::AND, dl, MVT::i32,
7949 DAG.getNode(ISD::XOR, dl, MVT::i32,
7950 CWD, DAG.getConstant(3, dl, MVT::i32)),
7951 DAG.getConstant(3, dl, MVT::i32)),
7952 DAG.getConstant(1, dl, MVT::i32));
7953
7954 SDValue RetVal =
7955 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7956
7957 return DAG.getNode((VT.getSizeInBits() < 16 ?
7958 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7959}
7960
7961SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7962 EVT VT = Op.getValueType();
7963 unsigned BitWidth = VT.getSizeInBits();
7964 SDLoc dl(Op);
7965 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7967, __PRETTY_FUNCTION__))
7966 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7967, __PRETTY_FUNCTION__))
7967 "Unexpected SHL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7967, __PRETTY_FUNCTION__))
;
7968
7969 // Expand into a bunch of logical ops. Note that these ops
7970 // depend on the PPC behavior for oversized shift amounts.
7971 SDValue Lo = Op.getOperand(0);
7972 SDValue Hi = Op.getOperand(1);
7973 SDValue Amt = Op.getOperand(2);
7974 EVT AmtVT = Amt.getValueType();
7975
7976 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7977 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7978 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7979 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7980 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7981 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7982 DAG.getConstant(-BitWidth, dl, AmtVT));
7983 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7984 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7985 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7986 SDValue OutOps[] = { OutLo, OutHi };
7987 return DAG.getMergeValues(OutOps, dl);
7988}
7989
7990SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7991 EVT VT = Op.getValueType();
7992 SDLoc dl(Op);
7993 unsigned BitWidth = VT.getSizeInBits();
7994 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7996, __PRETTY_FUNCTION__))
7995 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7996, __PRETTY_FUNCTION__))
7996 "Unexpected SRL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7996, __PRETTY_FUNCTION__))
;
7997
7998 // Expand into a bunch of logical ops. Note that these ops
7999 // depend on the PPC behavior for oversized shift amounts.
8000 SDValue Lo = Op.getOperand(0);
8001 SDValue Hi = Op.getOperand(1);
8002 SDValue Amt = Op.getOperand(2);
8003 EVT AmtVT = Amt.getValueType();
8004
8005 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8006 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8007 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8008 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8009 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8010 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8011 DAG.getConstant(-BitWidth, dl, AmtVT));
8012 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8013 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8014 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8015 SDValue OutOps[] = { OutLo, OutHi };
8016 return DAG.getMergeValues(OutOps, dl);
8017}
8018
8019SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8020 SDLoc dl(Op);
8021 EVT VT = Op.getValueType();
8022 unsigned BitWidth = VT.getSizeInBits();
8023 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8025, __PRETTY_FUNCTION__))
8024 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8025, __PRETTY_FUNCTION__))
8025 "Unexpected SRA!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8025, __PRETTY_FUNCTION__))
;
8026
8027 // Expand into a bunch of logical ops, followed by a select_cc.
8028 SDValue Lo = Op.getOperand(0);
8029 SDValue Hi = Op.getOperand(1);
8030 SDValue Amt = Op.getOperand(2);
8031 EVT AmtVT = Amt.getValueType();
8032
8033 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8034 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8035 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8036 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8037 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8038 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8039 DAG.getConstant(-BitWidth, dl, AmtVT));
8040 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8041 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8042 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8043 Tmp4, Tmp6, ISD::SETLE);
8044 SDValue OutOps[] = { OutLo, OutHi };
8045 return DAG.getMergeValues(OutOps, dl);
8046}
8047
8048//===----------------------------------------------------------------------===//
8049// Vector related lowering.
8050//
8051
8052/// BuildSplatI - Build a canonical splati of Val with an element size of
8053/// SplatSize. Cast the result to VT.
8054static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8055 SelectionDAG &DAG, const SDLoc &dl) {
8056 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!")((Val >= -16 && Val <= 15 && "vsplti is out of range!"
) ? static_cast<void> (0) : __assert_fail ("Val >= -16 && Val <= 15 && \"vsplti is out of range!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8056, __PRETTY_FUNCTION__))
;
8057
8058 static const MVT VTys[] = { // canonical VT to use for each size.
8059 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8060 };
8061
8062 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8063
8064 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8065 if (Val == -1)
8066 SplatSize = 1;
8067
8068 EVT CanonicalVT = VTys[SplatSize-1];
8069
8070 // Build a canonical splat for this value.
8071 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8072}
8073
8074/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8075/// specified intrinsic ID.
8076static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8077 const SDLoc &dl, EVT DestVT = MVT::Other) {
8078 if (DestVT == MVT::Other) DestVT = Op.getValueType();
8079 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8080 DAG.getConstant(IID, dl, MVT::i32), Op);
8081}
8082
8083/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8084/// specified intrinsic ID.
8085static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8086 SelectionDAG &DAG, const SDLoc &dl,
8087 EVT DestVT = MVT::Other) {
8088 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8089 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8090 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8091}
8092
8093/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8094/// specified intrinsic ID.
8095static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8096 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8097 EVT DestVT = MVT::Other) {
8098 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8099 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8100 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8101}
8102
8103/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8104/// amount. The result has the specified value type.
8105static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8106 SelectionDAG &DAG, const SDLoc &dl) {
8107 // Force LHS/RHS to be the right type.
8108 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8109 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8110
8111 int Ops[16];
8112 for (unsigned i = 0; i != 16; ++i)
8113 Ops[i] = i + Amt;
8114 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8115 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8116}
8117
8118/// Do we have an efficient pattern in a .td file for this node?
8119///
8120/// \param V - pointer to the BuildVectorSDNode being matched
8121/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8122///
8123/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8124/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8125/// the opposite is true (expansion is beneficial) are:
8126/// - The node builds a vector out of integers that are not 32 or 64-bits
8127/// - The node builds a vector out of constants
8128/// - The node is a "load-and-splat"
8129/// In all other cases, we will choose to keep the BUILD_VECTOR.
8130static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8131 bool HasDirectMove,
8132 bool HasP8Vector) {
8133 EVT VecVT = V->getValueType(0);
8134 bool RightType = VecVT == MVT::v2f64 ||
8135 (HasP8Vector && VecVT == MVT::v4f32) ||
8136 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8137 if (!RightType)
8138 return false;
8139
8140 bool IsSplat = true;
8141 bool IsLoad = false;
8142 SDValue Op0 = V->getOperand(0);
8143
8144 // This function is called in a block that confirms the node is not a constant
8145 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8146 // different constants.
8147 if (V->isConstant())
8148 return false;
8149 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8150 if (V->getOperand(i).isUndef())
8151 return false;
8152 // We want to expand nodes that represent load-and-splat even if the
8153 // loaded value is a floating point truncation or conversion to int.
8154 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8155 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8156 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8157 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8158 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8159 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8160 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8161 IsLoad = true;
8162 // If the operands are different or the input is not a load and has more
8163 // uses than just this BV node, then it isn't a splat.
8164 if (V->getOperand(i) != Op0 ||
8165 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8166 IsSplat = false;
8167 }
8168 return !(IsSplat && IsLoad);
8169}
8170
8171// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8172SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8173
8174 SDLoc dl(Op);
8175 SDValue Op0 = Op->getOperand(0);
8176
8177 if (!EnableQuadPrecision ||
8178 (Op.getValueType() != MVT::f128 ) ||
8179 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8180 (Op0.getOperand(0).getValueType() != MVT::i64) ||
8181 (Op0.getOperand(1).getValueType() != MVT::i64))
8182 return SDValue();
8183
8184 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8185 Op0.getOperand(1));
8186}
8187
8188// If this is a case we can't handle, return null and let the default
8189// expansion code take care of it. If we CAN select this case, and if it
8190// selects to a single instruction, return Op. Otherwise, if we can codegen
8191// this case more efficiently than a constant pool load, lower it to the
8192// sequence of ops that should be used.
8193SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8194 SelectionDAG &DAG) const {
8195 SDLoc dl(Op);
8196 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8197 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")((BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8197, __PRETTY_FUNCTION__))
;
8198
8199 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
8200 // We first build an i32 vector, load it into a QPX register,
8201 // then convert it to a floating-point vector and compare it
8202 // to a zero vector to get the boolean result.
8203 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8204 int FrameIdx = MFI.CreateStackObject(16, 16, false);
8205 MachinePointerInfo PtrInfo =
8206 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8207 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8208 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8209
8210 assert(BVN->getNumOperands() == 4 &&((BVN->getNumOperands() == 4 && "BUILD_VECTOR for v4i1 does not have 4 operands"
) ? static_cast<void> (0) : __assert_fail ("BVN->getNumOperands() == 4 && \"BUILD_VECTOR for v4i1 does not have 4 operands\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8211, __PRETTY_FUNCTION__))
8211 "BUILD_VECTOR for v4i1 does not have 4 operands")((BVN->getNumOperands() == 4 && "BUILD_VECTOR for v4i1 does not have 4 operands"
) ? static_cast<void> (0) : __assert_fail ("BVN->getNumOperands() == 4 && \"BUILD_VECTOR for v4i1 does not have 4 operands\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8211, __PRETTY_FUNCTION__))
;
8212
8213 bool IsConst = true;
8214 for (unsigned i = 0; i < 4; ++i) {
8215 if (BVN->getOperand(i).isUndef()) continue;
8216 if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
8217 IsConst = false;
8218 break;
8219 }
8220 }
8221
8222 if (IsConst) {
8223 Constant *One =
8224 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
8225 Constant *NegOne =
8226 ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
8227
8228 Constant *CV[4];
8229 for (unsigned i = 0; i < 4; ++i) {
8230 if (BVN->getOperand(i).isUndef())
8231 CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
8232 else if (isNullConstant(BVN->getOperand(i)))
8233 CV[i] = NegOne;
8234 else
8235 CV[i] = One;
8236 }
8237
8238 Constant *CP = ConstantVector::get(CV);
8239 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
8240 16 /* alignment */);
8241
8242 SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
8243 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
8244 return DAG.getMemIntrinsicNode(
8245 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
8246 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
8247 }
8248
8249 SmallVector<SDValue, 4> Stores;
8250 for (unsigned i = 0; i < 4; ++i) {
8251 if (BVN->getOperand(i).isUndef()) continue;
8252
8253 unsigned Offset = 4*i;
8254 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8255 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8256
8257 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
8258 if (StoreSize > 4) {
8259 Stores.push_back(
8260 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
8261 PtrInfo.getWithOffset(Offset), MVT::i32));
8262 } else {
8263 SDValue StoreValue = BVN->getOperand(i);
8264 if (StoreSize < 4)
8265 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
8266
8267 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
8268 PtrInfo.getWithOffset(Offset)));
8269 }
8270 }
8271
8272 SDValue StoreChain;
8273 if (!Stores.empty())
8274 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8275 else
8276 StoreChain = DAG.getEntryNode();
8277
8278 // Now load from v4i32 into the QPX register; this will extend it to
8279 // v4i64 but not yet convert it to a floating point. Nevertheless, this
8280 // is typed as v4f64 because the QPX register integer states are not
8281 // explicitly represented.
8282
8283 SDValue Ops[] = {StoreChain,
8284 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
8285 FIdx};
8286 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
8287
8288 SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
8289 dl, VTs, Ops, MVT::v4i32, PtrInfo);
8290 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8291 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
8292 LoadedVect);
8293
8294 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
8295
8296 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
8297 }
8298
8299 // All other QPX vectors are handled by generic code.
8300 if (Subtarget.hasQPX())
8301 return SDValue();
8302
8303 // Check if this is a splat of a constant value.
8304 APInt APSplatBits, APSplatUndef;
8305 unsigned SplatBitSize;
8306 bool HasAnyUndefs;
8307 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8308 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
8309 SplatBitSize > 32) {
8310 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8311 // lowered to VSX instructions under certain conditions.
8312 // Without VSX, there is no pattern more efficient than expanding the node.
8313 if (Subtarget.hasVSX() &&
8314 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8315 Subtarget.hasP8Vector()))
8316 return Op;
8317 return SDValue();
8318 }
8319
8320 unsigned SplatBits = APSplatBits.getZExtValue();
8321 unsigned SplatUndef = APSplatUndef.getZExtValue();
8322 unsigned SplatSize = SplatBitSize / 8;
8323
8324 // First, handle single instruction cases.
8325
8326 // All zeros?
8327 if (SplatBits == 0) {
8328 // Canonicalize all zero vectors to be v4i32.
8329 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8330 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8331 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8332 }
8333 return Op;
8334 }
8335
8336 // We have XXSPLTIB for constant splats one byte wide
8337 if (Subtarget.hasP9Vector() && SplatSize == 1) {
8338 // This is a splat of 1-byte elements with some elements potentially undef.
8339 // Rather than trying to match undef in the SDAG patterns, ensure that all
8340 // elements are the same constant.
8341 if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
8342 SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
8343 dl, MVT::i32));
8344 SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
8345 if (Op.getValueType() != MVT::v16i8)
8346 return DAG.getBitcast(Op.getValueType(), NewBV);
8347 return NewBV;
8348 }
8349
8350 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8351 // detect that constant splats like v8i16: 0xABAB are really just splats
8352 // of a 1-byte constant. In this case, we need to convert the node to a
8353 // splat of v16i8 and a bitcast.
8354 if (Op.getValueType() != MVT::v16i8)
8355 return DAG.getBitcast(Op.getValueType(),
8356 DAG.getConstant(SplatBits, dl, MVT::v16i8));
8357
8358 return Op;
8359 }
8360
8361 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8362 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8363 (32-SplatBitSize));
8364 if (SextVal >= -16 && SextVal <= 15)
8365 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
8366
8367 // Two instruction sequences.
8368
8369 // If this value is in the range [-32,30] and is even, use:
8370 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8371 // If this value is in the range [17,31] and is odd, use:
8372 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8373 // If this value is in the range [-31,-17] and is odd, use:
8374 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8375 // Note the last two are three-instruction sequences.
8376 if (SextVal >= -32 && SextVal <= 31) {
8377 // To avoid having these optimizations undone by constant folding,
8378 // we convert to a pseudo that will be expanded later into one of
8379 // the above forms.
8380 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8381 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8382 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8383 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8384 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8385 if (VT == Op.getValueType())
8386 return RetVal;
8387 else
8388 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8389 }
8390
8391 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8392 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8393 // for fneg/fabs.
8394 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8395 // Make -1 and vspltisw -1:
8396 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
8397
8398 // Make the VSLW intrinsic, computing 0x8000_0000.
8399 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8400 OnesV, DAG, dl);
8401
8402 // xor by OnesV to invert it.
8403 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8404 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8405 }
8406
8407 // Check to see if this is a wide variety of vsplti*, binop self cases.
8408 static const signed char SplatCsts[] = {
8409 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8410 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8411 };
8412
8413 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8414 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8415 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8416 int i = SplatCsts[idx];
8417
8418 // Figure out what shift amount will be used by altivec if shifted by i in
8419 // this splat size.
8420 unsigned TypeShiftAmt = i & (SplatBitSize-1);
8421
8422 // vsplti + shl self.
8423 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8424 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8425 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8426 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8427 Intrinsic::ppc_altivec_vslw
8428 };
8429 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8430 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8431 }
8432
8433 // vsplti + srl self.
8434 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8435 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8436 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8437 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8438 Intrinsic::ppc_altivec_vsrw
8439 };
8440 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8441 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8442 }
8443
8444 // vsplti + sra self.
8445 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8446 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8447 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8448 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
8449 Intrinsic::ppc_altivec_vsraw
8450 };
8451 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8452 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8453 }
8454
8455 // vsplti + rol self.
8456 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8457 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8458 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8459 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8460 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8461 Intrinsic::ppc_altivec_vrlw
8462 };
8463 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8464 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8465 }
8466
8467 // t = vsplti c, result = vsldoi t, t, 1
8468 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8469 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8470 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8471 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8472 }
8473 // t = vsplti c, result = vsldoi t, t, 2
8474 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8475 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8476 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8477 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8478 }
8479 // t = vsplti c, result = vsldoi t, t, 3
8480 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8481 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8482 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8483 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8484 }
8485 }
8486
8487 return SDValue();
8488}
8489
8490/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8491/// the specified operations to build the shuffle.
8492static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8493 SDValue RHS, SelectionDAG &DAG,
8494 const SDLoc &dl) {
8495 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8496 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8497 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8498
8499 enum {
8500 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8501 OP_VMRGHW,
8502 OP_VMRGLW,
8503 OP_VSPLTISW0,
8504 OP_VSPLTISW1,
8505 OP_VSPLTISW2,
8506 OP_VSPLTISW3,
8507 OP_VSLDOI4,
8508 OP_VSLDOI8,
8509 OP_VSLDOI12
8510 };
8511
8512 if (OpNum == OP_COPY) {
8513 if (LHSID == (1*9+2)*9+3) return LHS;
8514 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8514, __PRETTY_FUNCTION__))
;
8515 return RHS;
8516 }
8517
8518 SDValue OpLHS, OpRHS;
8519 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8520 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8521
8522 int ShufIdxs[16];
8523 switch (OpNum) {
8524 default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8524)
;
8525 case OP_VMRGHW:
8526 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
8527 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8528 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
8529 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8530 break;
8531 case OP_VMRGLW:
8532 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8533 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8534 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8535 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8536 break;
8537 case OP_VSPLTISW0:
8538 for (unsigned i = 0; i != 16; ++i)
8539 ShufIdxs[i] = (i&3)+0;
8540 break;
8541 case OP_VSPLTISW1:
8542 for (unsigned i = 0; i != 16; ++i)
8543 ShufIdxs[i] = (i&3)+4;
8544 break;
8545 case OP_VSPLTISW2:
8546 for (unsigned i = 0; i != 16; ++i)
8547 ShufIdxs[i] = (i&3)+8;
8548 break;
8549 case OP_VSPLTISW3:
8550 for (unsigned i = 0; i != 16; ++i)
8551 ShufIdxs[i] = (i&3)+12;
8552 break;
8553 case OP_VSLDOI4:
8554 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8555 case OP_VSLDOI8:
8556 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8557 case OP_VSLDOI12:
8558 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8559 }
8560 EVT VT = OpLHS.getValueType();
8561 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8562 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8563 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8564 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8565}
8566
8567/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8568/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8569/// SDValue.
8570SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8571 SelectionDAG &DAG) const {
8572 const unsigned BytesInVector = 16;
8573 bool IsLE = Subtarget.isLittleEndian();
8574 SDLoc dl(N);
8575 SDValue V1 = N->getOperand(0);
8576 SDValue V2 = N->getOperand(1);
8577 unsigned ShiftElts = 0, InsertAtByte = 0;
8578 bool Swap = false;
8579
8580 // Shifts required to get the byte we want at element 7.
8581 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
8582 0, 15, 14, 13, 12, 11, 10, 9};
8583 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8584 1, 2, 3, 4, 5, 6, 7, 8};
8585
8586 ArrayRef<int> Mask = N->getMask();
8587 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8588
8589 // For each mask element, find out if we're just inserting something
8590 // from V2 into V1 or vice versa.
8591 // Possible permutations inserting an element from V2 into V1:
8592 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8593 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8594 // ...
8595 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8596 // Inserting from V1 into V2 will be similar, except mask range will be
8597 // [16,31].
8598
8599 bool FoundCandidate = false;
8600 // If both vector operands for the shuffle are the same vector, the mask
8601 // will contain only elements from the first one and the second one will be
8602 // undef.
8603 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8604 // Go through the mask of half-words to find an element that's being moved
8605 // from one vector to the other.
8606 for (unsigned i = 0; i < BytesInVector; ++i) {
8607 unsigned CurrentElement = Mask[i];
8608 // If 2nd operand is undefined, we should only look for element 7 in the
8609 // Mask.
8610 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8611 continue;
8612
8613 bool OtherElementsInOrder = true;
8614 // Examine the other elements in the Mask to see if they're in original
8615 // order.
8616 for (unsigned j = 0; j < BytesInVector; ++j) {
8617 if (j == i)
8618 continue;
8619 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8620 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8621 // in which we always assume we're always picking from the 1st operand.
8622 int MaskOffset =
8623 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8624 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8625 OtherElementsInOrder = false;
8626 break;
8627 }
8628 }
8629 // If other elements are in original order, we record the number of shifts
8630 // we need to get the element we want into element 7. Also record which byte
8631 // in the vector we should insert into.
8632 if (OtherElementsInOrder) {
8633 // If 2nd operand is undefined, we assume no shifts and no swapping.
8634 if (V2.isUndef()) {
8635 ShiftElts = 0;
8636 Swap = false;
8637 } else {
8638 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8639 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
8640 : BigEndianShifts[CurrentElement & 0xF];
8641 Swap = CurrentElement < BytesInVector;
8642 }
8643 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
8644 FoundCandidate = true;
8645 break;
8646 }
8647 }
8648
8649 if (!FoundCandidate)
8650 return SDValue();
8651
8652 // Candidate found, construct the proper SDAG sequence with VINSERTB,
8653 // optionally with VECSHL if shift is required.
8654 if (Swap)
8655 std::swap(V1, V2);
8656 if (V2.isUndef())
8657 V2 = V1;
8658 if (ShiftElts) {
8659 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8660 DAG.getConstant(ShiftElts, dl, MVT::i32));
8661 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
8662 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8663 }
8664 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
8665 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8666}
8667
8668/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8669/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8670/// SDValue.
8671SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
8672 SelectionDAG &DAG) const {
8673 const unsigned NumHalfWords = 8;
8674 const unsigned BytesInVector = NumHalfWords * 2;
8675 // Check that the shuffle is on half-words.
8676 if (!isNByteElemShuffleMask(N, 2, 1))
8677 return SDValue();
8678
8679 bool IsLE = Subtarget.isLittleEndian();
8680 SDLoc dl(N);
8681 SDValue V1 = N->getOperand(0);
8682 SDValue V2 = N->getOperand(1);
8683 unsigned ShiftElts = 0, InsertAtByte = 0;
8684 bool Swap = false;
8685
8686 // Shifts required to get the half-word we want at element 3.
8687 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
8688 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
8689
8690 uint32_t Mask = 0;
8691 uint32_t OriginalOrderLow = 0x1234567;
8692 uint32_t OriginalOrderHigh = 0x89ABCDEF;
8693 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8694 // 32-bit space, only need 4-bit nibbles per element.
8695 for (unsigned i = 0; i < NumHalfWords; ++i) {
8696 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8697 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
8698 }
8699
8700 // For each mask element, find out if we're just inserting something
8701 // from V2 into V1 or vice versa. Possible permutations inserting an element
8702 // from V2 into V1:
8703 // X, 1, 2, 3, 4, 5, 6, 7
8704 // 0, X, 2, 3, 4, 5, 6, 7
8705 // 0, 1, X, 3, 4, 5, 6, 7
8706 // 0, 1, 2, X, 4, 5, 6, 7
8707 // 0, 1, 2, 3, X, 5, 6, 7
8708 // 0, 1, 2, 3, 4, X, 6, 7
8709 // 0, 1, 2, 3, 4, 5, X, 7
8710 // 0, 1, 2, 3, 4, 5, 6, X
8711 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8712
8713 bool FoundCandidate = false;
8714 // Go through the mask of half-words to find an element that's being moved
8715 // from one vector to the other.
8716 for (unsigned i = 0; i < NumHalfWords; ++i) {
8717 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8718 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
8719 uint32_t MaskOtherElts = ~(0xF << MaskShift);
8720 uint32_t TargetOrder = 0x0;
8721
8722 // If both vector operands for the shuffle are the same vector, the mask
8723 // will contain only elements from the first one and the second one will be
8724 // undef.
8725 if (V2.isUndef()) {
8726 ShiftElts = 0;
8727 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
8728 TargetOrder = OriginalOrderLow;
8729 Swap = false;
8730 // Skip if not the correct element or mask of other elements don't equal
8731 // to our expected order.
8732 if (MaskOneElt == VINSERTHSrcElem &&
8733 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8734 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8735 FoundCandidate = true;
8736 break;
8737 }
8738 } else { // If both operands are defined.
8739 // Target order is [8,15] if the current mask is between [0,7].
8740 TargetOrder =
8741 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
8742 // Skip if mask of other elements don't equal our expected order.
8743 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8744 // We only need the last 3 bits for the number of shifts.
8745 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
8746 : BigEndianShifts[MaskOneElt & 0x7];
8747 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8748 Swap = MaskOneElt < NumHalfWords;
8749 FoundCandidate = true;
8750 break;
8751 }
8752 }
8753 }
8754
8755 if (!FoundCandidate)
8756 return SDValue();
8757
8758 // Candidate found, construct the proper SDAG sequence with VINSERTH,
8759 // optionally with VECSHL if shift is required.
8760 if (Swap)
8761 std::swap(V1, V2);
8762 if (V2.isUndef())
8763 V2 = V1;
8764 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8765 if (ShiftElts) {
8766 // Double ShiftElts because we're left shifting on v16i8 type.
8767 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8768 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
8769 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
8770 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8771 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8772 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8773 }
8774 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8775 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8776 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8777 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8778}
8779
8780/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8781/// is a shuffle we can handle in a single instruction, return it. Otherwise,
8782/// return the code it can be lowered into. Worst case, it can always be
8783/// lowered into a vperm.
8784SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8785 SelectionDAG &DAG) const {
8786 SDLoc dl(Op);
8787 SDValue V1 = Op.getOperand(0);
8788 SDValue V2 = Op.getOperand(1);
8789 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8790 EVT VT = Op.getValueType();
8791 bool isLittleEndian = Subtarget.isLittleEndian();
8792
8793 unsigned ShiftElts, InsertAtByte;
1
'ShiftElts' declared without an initial value
8794 bool Swap = false;
8795 if (Subtarget.hasP9Vector() &&
2
Assuming the condition is false
8796 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
8797 isLittleEndian)) {
8798 if (Swap)
8799 std::swap(V1, V2);
8800 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8801 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
8802 if (ShiftElts) {
8803 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
8804 DAG.getConstant(ShiftElts, dl, MVT::i32));
8805 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
8806 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8807 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8808 }
8809 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
8810 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8811 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8812 }
8813
8814 if (Subtarget.hasP9Altivec()) {
3
Assuming the condition is false
4
Taking false branch
8815 SDValue NewISDNode;
8816 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
8817 return NewISDNode;
8818
8819 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
8820 return NewISDNode;
8821 }
8822
8823 if (Subtarget.hasVSX() &&
5
Assuming the condition is true
37
Taking true branch
8824 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
6
Calling 'isXXSLDWIShuffleMask'
36
Returning from 'isXXSLDWIShuffleMask'
8825 if (Swap
37.1
'Swap' is false
37.1
'Swap' is false
)
38
Taking false branch
8826 std::swap(V1, V2);
8827 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8828 SDValue Conv2 =
8829 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
39
'?' condition is false
8830
8831 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
8832 DAG.getConstant(ShiftElts, dl, MVT::i32));
40
1st function call argument is an uninitialized value
8833 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
8834 }
8835
8836 if (Subtarget.hasVSX() &&
8837 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8838 if (Swap)
8839 std::swap(V1, V2);
8840 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8841 SDValue Conv2 =
8842 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
8843
8844 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
8845 DAG.getConstant(ShiftElts, dl, MVT::i32));
8846 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
8847 }
8848
8849 if (Subtarget.hasP9Vector()) {
8850 if (PPC::isXXBRHShuffleMask(SVOp)) {
8851 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8852 SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
8853 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
8854 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
8855 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8856 SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
8857 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
8858 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
8859 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8860 SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
8861 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
8862 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
8863 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
8864 SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
8865 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
8866 }
8867 }
8868
8869 if (Subtarget.hasVSX()) {
8870 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
8871 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
8872
8873 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8874 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
8875 DAG.getConstant(SplatIdx, dl, MVT::i32));
8876 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
8877 }
8878
8879 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8880 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
8881 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8882 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
8883 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
8884 }
8885 }
8886
8887 if (Subtarget.hasQPX()) {
8888 if (VT.getVectorNumElements() != 4)
8889 return SDValue();
8890
8891 if (V2.isUndef()) V2 = V1;
8892
8893 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8894 if (AlignIdx != -1) {
8895 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8896 DAG.getConstant(AlignIdx, dl, MVT::i32));
8897 } else if (SVOp->isSplat()) {
8898 int SplatIdx = SVOp->getSplatIndex();
8899 if (SplatIdx >= 4) {
8900 std::swap(V1, V2);
8901 SplatIdx -= 4;
8902 }
8903
8904 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8905 DAG.getConstant(SplatIdx, dl, MVT::i32));
8906 }
8907
8908 // Lower this into a qvgpci/qvfperm pair.
8909
8910 // Compute the qvgpci literal
8911 unsigned idx = 0;
8912 for (unsigned i = 0; i < 4; ++i) {
8913 int m = SVOp->getMaskElt(i);
8914 unsigned mm = m >= 0 ? (unsigned) m : i;
8915 idx |= mm << (3-i)*3;
8916 }
8917
8918 SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8919 DAG.getConstant(idx, dl, MVT::i32));
8920 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8921 }
8922
8923 // Cases that are handled by instructions that take permute immediates
8924 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8925 // selected by the instruction selector.
8926 if (V2.isUndef()) {
8927 if (PPC::isSplatShuffleMask(SVOp, 1) ||
8928 PPC::isSplatShuffleMask(SVOp, 2) ||
8929 PPC::isSplatShuffleMask(SVOp, 4) ||
8930 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8931 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8932 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8933 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8934 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8935 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8936 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8937 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8938 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8939 (Subtarget.hasP8Altivec() && (
8940 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8941 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8942 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8943 return Op;
8944 }
8945 }
8946
8947 // Altivec has a variety of "shuffle immediates" that take two vector inputs
8948 // and produce a fixed permutation. If any of these match, do not lower to
8949 // VPERM.
8950 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8951 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8952 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8953 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8954 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8955 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8956 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8957 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8958 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8959 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8960 (Subtarget.hasP8Altivec() && (
8961 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8962 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8963 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8964 return Op;
8965
8966 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
8967 // perfect shuffle table to emit an optimal matching sequence.
8968 ArrayRef<int> PermMask = SVOp->getMask();
8969
8970 unsigned PFIndexes[4];
8971 bool isFourElementShuffle = true;
8972 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8973 unsigned EltNo = 8; // Start out undef.
8974 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
8975 if (PermMask[i*4+j] < 0)
8976 continue; // Undef, ignore it.
8977
8978 unsigned ByteSource = PermMask[i*4+j];
8979 if ((ByteSource & 3) != j) {
8980 isFourElementShuffle = false;
8981 break;
8982 }
8983
8984 if (EltNo == 8) {
8985 EltNo = ByteSource/4;
8986 } else if (EltNo != ByteSource/4) {
8987 isFourElementShuffle = false;
8988 break;
8989 }
8990 }
8991 PFIndexes[i] = EltNo;
8992 }
8993
8994 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8995 // perfect shuffle vector to determine if it is cost effective to do this as
8996 // discrete instructions, or whether we should use a vperm.
8997 // For now, we skip this for little endian until such time as we have a
8998 // little-endian perfect shuffle table.
8999 if (isFourElementShuffle && !isLittleEndian) {
9000 // Compute the index in the perfect shuffle table.
9001 unsigned PFTableIndex =
9002 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9003
9004 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9005 unsigned Cost = (PFEntry >> 30);
9006
9007 // Determining when to avoid vperm is tricky. Many things affect the cost
9008 // of vperm, particularly how many times the perm mask needs to be computed.
9009 // For example, if the perm mask can be hoisted out of a loop or is already
9010 // used (perhaps because there are multiple permutes with the same shuffle
9011 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9012 // the loop requires an extra register.
9013 //
9014 // As a compromise, we only emit discrete instructions if the shuffle can be
9015 // generated in 3 or fewer operations. When we have loop information
9016 // available, if this block is within a loop, we should avoid using vperm
9017 // for 3-operation perms and use a constant pool load instead.
9018 if (Cost < 3)
9019 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9020 }
9021
9022 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9023 // vector that will get spilled to the constant pool.
9024 if (V2.isUndef()) V2 = V1;
9025
9026 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9027 // that it is in input element units, not in bytes. Convert now.
9028
9029 // For little endian, the order of the input vectors is reversed, and
9030 // the permutation mask is complemented with respect to 31. This is
9031 // necessary to produce proper semantics with the big-endian-biased vperm
9032 // instruction.
9033 EVT EltVT = V1.getValueType().getVectorElementType();
9034 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9035
9036 SmallVector<SDValue, 16> ResultMask;
9037 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9038 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9039
9040 for (unsigned j = 0; j != BytesPerElement; ++j)
9041 if (isLittleEndian)
9042 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9043 dl, MVT::i32));
9044 else
9045 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9046 MVT::i32));
9047 }
9048
9049 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9050 if (isLittleEndian)
9051 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9052 V2, V1, VPermMask);
9053 else
9054 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9055 V1, V2, VPermMask);
9056}
9057
9058/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9059/// vector comparison. If it is, return true and fill in Opc/isDot with
9060/// information about the intrinsic.
9061static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9062 bool &isDot, const PPCSubtarget &Subtarget) {
9063 unsigned IntrinsicID =
9064 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9065 CompareOpc = -1;
9066 isDot = false;
9067 switch (IntrinsicID) {
9068 default:
9069 return false;
9070 // Comparison predicates.
9071 case Intrinsic::ppc_altivec_vcmpbfp_p:
9072 CompareOpc = 966;
9073 isDot = true;
9074 break;
9075 case Intrinsic::ppc_altivec_vcmpeqfp_p:
9076 CompareOpc = 198;
9077 isDot = true;
9078 break;
9079 case Intrinsic::ppc_altivec_vcmpequb_p:
9080 CompareOpc = 6;
9081 isDot = true;
9082 break;
9083 case Intrinsic::ppc_altivec_vcmpequh_p:
9084 CompareOpc = 70;
9085 isDot = true;
9086 break;
9087 case Intrinsic::ppc_altivec_vcmpequw_p:
9088 CompareOpc = 134;
9089 isDot = true;
9090 break;
9091 case Intrinsic::ppc_altivec_vcmpequd_p:
9092 if (Subtarget.hasP8Altivec()) {
9093 CompareOpc = 199;
9094 isDot = true;
9095 } else
9096 return false;
9097 break;
9098 case Intrinsic::ppc_altivec_vcmpneb_p:
9099 case Intrinsic::ppc_altivec_vcmpneh_p:
9100 case Intrinsic::ppc_altivec_vcmpnew_p:
9101 case Intrinsic::ppc_altivec_vcmpnezb_p:
9102 case Intrinsic::ppc_altivec_vcmpnezh_p:
9103 case Intrinsic::ppc_altivec_vcmpnezw_p:
9104 if (Subtarget.hasP9Altivec()) {
9105 switch (IntrinsicID) {
9106 default:
9107 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9107)
;
9108 case Intrinsic::ppc_altivec_vcmpneb_p:
9109 CompareOpc = 7;
9110 break;
9111 case Intrinsic::ppc_altivec_vcmpneh_p:
9112 CompareOpc = 71;
9113 break;
9114 case Intrinsic::ppc_altivec_vcmpnew_p:
9115 CompareOpc = 135;
9116 break;
9117 case Intrinsic::ppc_altivec_vcmpnezb_p:
9118 CompareOpc = 263;
9119 break;
9120 case Intrinsic::ppc_altivec_vcmpnezh_p:
9121 CompareOpc = 327;
9122 break;
9123 case Intrinsic::ppc_altivec_vcmpnezw_p:
9124 CompareOpc = 391;
9125 break;
9126 }
9127 isDot = true;
9128 } else
9129 return false;
9130 break;
9131 case Intrinsic::ppc_altivec_vcmpgefp_p:
9132 CompareOpc = 454;
9133 isDot = true;
9134 break;
9135 case Intrinsic::ppc_altivec_vcmpgtfp_p:
9136 CompareOpc = 710;
9137 isDot = true;
9138 break;
9139 case Intrinsic::ppc_altivec_vcmpgtsb_p:
9140 CompareOpc = 774;
9141 isDot = true;
9142 break;
9143 case Intrinsic::ppc_altivec_vcmpgtsh_p:
9144 CompareOpc = 838;
9145 isDot = true;
9146 break;
9147 case Intrinsic::ppc_altivec_vcmpgtsw_p:
9148 CompareOpc = 902;
9149 isDot = true;
9150 break;
9151 case Intrinsic::ppc_altivec_vcmpgtsd_p:
9152 if (Subtarget.hasP8Altivec()) {
9153 CompareOpc = 967;
9154 isDot = true;
9155 } else
9156 return false;
9157 break;
9158 case Intrinsic::ppc_altivec_vcmpgtub_p:
9159 CompareOpc = 518;
9160 isDot = true;
9161 break;
9162 case Intrinsic::ppc_altivec_vcmpgtuh_p:
9163 CompareOpc = 582;
9164 isDot = true;
9165 break;
9166 case Intrinsic::ppc_altivec_vcmpgtuw_p:
9167 CompareOpc = 646;
9168 isDot = true;
9169 break;
9170 case Intrinsic::ppc_altivec_vcmpgtud_p:
9171 if (Subtarget.hasP8Altivec()) {
9172 CompareOpc = 711;
9173 isDot = true;
9174 } else
9175 return false;
9176 break;
9177
9178 // VSX predicate comparisons use the same infrastructure
9179 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9180 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9181 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9182 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9183 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9184 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9185 if (Subtarget.hasVSX()) {
9186 switch (IntrinsicID) {
9187 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9188 CompareOpc = 99;
9189 break;
9190 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9191 CompareOpc = 115;
9192 break;
9193 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9194 CompareOpc = 107;
9195 break;
9196 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9197 CompareOpc = 67;
9198 break;
9199 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9200 CompareOpc = 83;
9201 break;
9202 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9203 CompareOpc = 75;
9204 break;
9205 }
9206 isDot = true;
9207 } else
9208 return false;
9209 break;
9210
9211 // Normal Comparisons.
9212 case Intrinsic::ppc_altivec_vcmpbfp:
9213 CompareOpc = 966;
9214 break;
9215 case Intrinsic::ppc_altivec_vcmpeqfp:
9216 CompareOpc = 198;
9217 break;
9218 case Intrinsic::ppc_altivec_vcmpequb:
9219 CompareOpc = 6;
9220 break;
9221 case Intrinsic::ppc_altivec_vcmpequh:
9222 CompareOpc = 70;
9223 break;
9224 case Intrinsic::ppc_altivec_vcmpequw:
9225 CompareOpc = 134;
9226 break;
9227 case Intrinsic::ppc_altivec_vcmpequd:
9228 if (Subtarget.hasP8Altivec())
9229 CompareOpc = 199;
9230 else
9231 return false;
9232 break;
9233 case Intrinsic::ppc_altivec_vcmpneb:
9234 case Intrinsic::ppc_altivec_vcmpneh:
9235 case Intrinsic::ppc_altivec_vcmpnew:
9236 case Intrinsic::ppc_altivec_vcmpnezb:
9237 case Intrinsic::ppc_altivec_vcmpnezh:
9238 case Intrinsic::ppc_altivec_vcmpnezw:
9239 if (Subtarget.hasP9Altivec())
9240 switch (IntrinsicID) {
9241 default:
9242 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9242)
;
9243 case Intrinsic::ppc_altivec_vcmpneb:
9244 CompareOpc = 7;
9245 break;
9246 case Intrinsic::ppc_altivec_vcmpneh:
9247 CompareOpc = 71;
9248 break;
9249 case Intrinsic::ppc_altivec_vcmpnew:
9250 CompareOpc = 135;
9251 break;
9252 case Intrinsic::ppc_altivec_vcmpnezb:
9253 CompareOpc = 263;
9254 break;
9255 case Intrinsic::ppc_altivec_vcmpnezh:
9256 CompareOpc = 327;
9257 break;
9258 case Intrinsic::ppc_altivec_vcmpnezw:
9259 CompareOpc = 391;
9260 break;
9261 }
9262 else
9263 return false;
9264 break;
9265 case Intrinsic::ppc_altivec_vcmpgefp:
9266 CompareOpc = 454;
9267 break;
9268 case Intrinsic::ppc_altivec_vcmpgtfp:
9269 CompareOpc = 710;
9270 break;
9271 case Intrinsic::ppc_altivec_vcmpgtsb:
9272 CompareOpc = 774;
9273 break;
9274 case Intrinsic::ppc_altivec_vcmpgtsh:
9275 CompareOpc = 838;
9276 break;
9277 case Intrinsic::ppc_altivec_vcmpgtsw:
9278 CompareOpc = 902;
9279 break;
9280 case Intrinsic::ppc_altivec_vcmpgtsd:
9281 if (Subtarget.hasP8Altivec())
9282 CompareOpc = 967;
9283 else
9284 return false;
9285 break;
9286 case Intrinsic::ppc_altivec_vcmpgtub:
9287 CompareOpc = 518;
9288 break;
9289 case Intrinsic::ppc_altivec_vcmpgtuh:
9290 CompareOpc = 582;
9291 break;
9292 case Intrinsic::ppc_altivec_vcmpgtuw:
9293 CompareOpc = 646;
9294 break;
9295 case Intrinsic::ppc_altivec_vcmpgtud:
9296 if (Subtarget.hasP8Altivec())
9297 CompareOpc = 711;
9298 else
9299 return false;
9300 break;
9301 }
9302 return true;
9303}
9304
9305/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9306/// lower, do it, otherwise return null.
9307SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9308 SelectionDAG &DAG) const {
9309 unsigned IntrinsicID =
9310 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9311
9312 SDLoc dl(Op);
9313
9314 if (IntrinsicID == Intrinsic::thread_pointer) {
9315 // Reads the thread pointer register, used for __builtin_thread_pointer.
9316 if (Subtarget.isPPC64())
9317 return DAG.getRegister(PPC::X13, MVT::i64);
9318 return DAG.getRegister(PPC::R2, MVT::i32);
9319 }
9320
9321 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9322 // opcode number of the comparison.
9323 int CompareOpc;
9324 bool isDot;
9325 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9326 return SDValue(); // Don't custom lower most intrinsics.
9327
9328 // If this is a non-dot comparison, make the VCMP node and we are done.
9329 if (!isDot) {
9330 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9331 Op.getOperand(1), Op.getOperand(2),
9332 DAG.getConstant(CompareOpc, dl, MVT::i32));
9333 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9334 }
9335
9336 // Create the PPCISD altivec 'dot' comparison node.
9337 SDValue Ops[] = {
9338 Op.getOperand(2), // LHS
9339 Op.getOperand(3), // RHS
9340 DAG.getConstant(CompareOpc, dl, MVT::i32)
9341 };
9342 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9343 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
9344
9345 // Now that we have the comparison, emit a copy from the CR to a GPR.
9346 // This is flagged to the above dot comparison.
9347 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9348 DAG.getRegister(PPC::CR6, MVT::i32),
9349 CompNode.getValue(1));
9350
9351 // Unpack the result based on how the target uses it.
9352 unsigned BitNo; // Bit # of CR6.
9353 bool InvertBit; // Invert result?
9354 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9355 default: // Can't happen, don't crash on invalid number though.
9356 case 0: // Return the value of the EQ bit of CR6.
9357 BitNo = 0; InvertBit = false;
9358 break;
9359 case 1: // Return the inverted value of the EQ bit of CR6.
9360 BitNo = 0; InvertBit = true;
9361 break;
9362 case 2: // Return the value of the LT bit of CR6.
9363 BitNo = 2; InvertBit = false;
9364 break;
9365 case 3: // Return the inverted value of the LT bit of CR6.
9366 BitNo = 2; InvertBit = true;
9367 break;
9368 }
9369
9370 // Shift the bit into the low position.
9371 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9372 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9373 // Isolate the bit.
9374 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9375 DAG.getConstant(1, dl, MVT::i32));
9376
9377 // If we are supposed to, toggle the bit.
9378 if (InvertBit)
9379 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9380 DAG.getConstant(1, dl, MVT::i32));
9381 return Flags;
9382}
9383
9384SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9385 SelectionDAG &DAG) const {
9386 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9387 // the beginning of the argument list.
9388 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9389 SDLoc DL(Op);
9390 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9391 case Intrinsic::ppc_cfence: {
9392 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")((ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? static_cast<void> (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9392, __PRETTY_FUNCTION__))
;
9393 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")((Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9393, __PRETTY_FUNCTION__))
;
9394 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9395 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
9396 Op.getOperand(ArgStart + 1)),
9397 Op.getOperand(0)),
9398 0);
9399 }
9400 default:
9401 break;
9402 }
9403 return SDValue();
9404}
9405
9406SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
9407 // Check for a DIV with the same operands as this REM.
9408 for (auto UI : Op.getOperand(1)->uses()) {
9409 if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
9410 (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
9411 if (UI->getOperand(0) == Op.getOperand(0) &&
9412 UI->getOperand(1) == Op.getOperand(1))
9413 return SDValue();
9414 }
9415 return Op;
9416}
9417
9418// Lower scalar BSWAP64 to xxbrd.
9419SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9420 SDLoc dl(Op);
9421 // MTVSRDD
9422 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9423 Op.getOperand(0));
9424 // XXBRD
9425 Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
9426 // MFVSRD
9427 int VectorIndex = 0;
9428 if (Subtarget.isLittleEndian())
9429 VectorIndex = 1;
9430 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
9431 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9432 return Op;
9433}
9434
9435// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9436// compared to a value that is atomically loaded (atomic loads zero-extend).
9437SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9438 SelectionDAG &DAG) const {
9439 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9440, __PRETTY_FUNCTION__))
9440 "Expecting an atomic compare-and-swap here.")((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9440, __PRETTY_FUNCTION__))
;
9441 SDLoc dl(Op);
9442 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9443 EVT MemVT = AtomicNode->getMemoryVT();
9444 if (MemVT.getSizeInBits() >= 32)
9445 return Op;
9446
9447 SDValue CmpOp = Op.getOperand(2);
9448 // If this is already correctly zero-extended, leave it alone.
9449 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
9450 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
9451 return Op;
9452
9453 // Clear the high bits of the compare operand.
9454 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
9455 SDValue NewCmpOp =
9456 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
9457 DAG.getConstant(MaskVal, dl, MVT::i32));
9458
9459 // Replace the existing compare operand with the properly zero-extended one.
9460 SmallVector<SDValue, 4> Ops;
9461 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
9462 Ops.push_back(AtomicNode->getOperand(i));
9463 Ops[2] = NewCmpOp;
9464 MachineMemOperand *MMO = AtomicNode->getMemOperand();
9465 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
9466 auto NodeTy =
9467 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
9468 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
9469}
9470
9471SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
9472 SelectionDAG &DAG) const {
9473 SDLoc dl(Op);
9474 // Create a stack slot that is 16-byte aligned.
9475 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9476 int FrameIdx = MFI.CreateStackObject(16, 16, false);
9477 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9478 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9479
9480 // Store the input value into Value#0 of the stack slot.
9481 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
9482 MachinePointerInfo());
9483 // Load it out.
9484 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
9485}
9486
9487SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9488 SelectionDAG &DAG) const {
9489 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9490, __PRETTY_FUNCTION__))
9490 "Should only be called for ISD::INSERT_VECTOR_ELT")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9490, __PRETTY_FUNCTION__))
;
9491
9492 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
9493 // We have legal lowering for constant indices but not for variable ones.
9494 if (!C)
9495 return SDValue();
9496
9497 EVT VT = Op.getValueType();
9498 SDLoc dl(Op);
9499 SDValue V1 = Op.getOperand(0);
9500 SDValue V2 = Op.getOperand(1);
9501 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9502 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
9503 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
9504 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
9505 unsigned InsertAtElement = C->getZExtValue();
9506 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
9507 if (Subtarget.isLittleEndian()) {
9508 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
9509 }
9510 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
9511 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9512 }
9513 return Op;
9514}
9515
9516SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9517 SelectionDAG &DAG) const {
9518 SDLoc dl(Op);
9519 SDNode *N = Op.getNode();
9520
9521 assert(N->getOperand(0).getValueType() == MVT::v4i1 &&((N->getOperand(0).getValueType() == MVT::v4i1 && "Unknown extract_vector_elt type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == MVT::v4i1 && \"Unknown extract_vector_elt type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9522, __PRETTY_FUNCTION__))
9522 "Unknown extract_vector_elt type")((N->getOperand(0).getValueType() == MVT::v4i1 && "Unknown extract_vector_elt type"
) ? static_cast<void> (0) : __assert_fail ("N->getOperand(0).getValueType() == MVT::v4i1 && \"Unknown extract_vector_elt type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9522, __PRETTY_FUNCTION__))
;
9523
9524 SDValue Value = N->getOperand(0);
9525
9526 // The first part of this is like the store lowering except that we don't
9527 // need to track the chain.
9528
9529 // The values are now known to be -1 (false) or 1 (true). To convert this
9530 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9531 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9532 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9533
9534 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9535 // understand how to form the extending load.
9536 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9537
9538 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9539
9540 // Now convert to an integer and store.
9541 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9542 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9543 Value);
9544
9545 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9546 int FrameIdx = MFI.CreateStackObject(16, 16, false);
9547 MachinePointerInfo PtrInfo =
9548 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9549 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9550 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9551
9552 SDValue StoreChain = DAG.getEntryNode();
9553 SDValue Ops[] = {StoreChain,
9554 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9555 Value, FIdx};
9556 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9557
9558 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9559 dl, VTs, Ops, MVT::v4i32, PtrInfo);
9560
9561 // Extract the value requested.
9562 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9563 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9564 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9565
9566 SDValue IntVal =
9567 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
9568
9569 if (!Subtarget.useCRBits())
9570 return IntVal;
9571
9572 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
9573}
9574
9575/// Lowering for QPX v4i1 loads
9576SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
9577 SelectionDAG &DAG) const {
9578 SDLoc dl(Op);
9579 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
9580 SDValue LoadChain = LN->getChain();
9581 SDValue BasePtr = LN->getBasePtr();
9582
9583 if (Op.getValueType() == MVT::v4f64 ||
9584 Op.getValueType() == MVT::v4f32) {
9585 EVT MemVT = LN->getMemoryVT();
9586 unsigned Alignment = LN->getAlignment();
9587
9588 // If this load is properly aligned, then it is legal.
9589 if (Alignment >= MemVT.getStoreSize())
9590 return Op;
9591
9592 EVT ScalarVT = Op.getValueType().getScalarType(),
9593 ScalarMemVT = MemVT.getScalarType();
9594 unsigned Stride = ScalarMemVT.getStoreSize();
9595
9596 SDValue Vals[4], LoadChains[4];
9597 for (unsigned Idx = 0; Idx < 4; ++Idx) {
9598 SDValue Load;
9599 if (ScalarVT != ScalarMemVT)
9600 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
9601 BasePtr,
9602 LN->getPointerInfo().getWithOffset(Idx * Stride),
9603 ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9604 LN->getMemOperand()->getFlags(), LN->getAAInfo());
9605 else
9606 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
9607 LN->getPointerInfo().getWithOffset(Idx * Stride),
9608 MinAlign(Alignment, Idx * Stride),
9609 LN->getMemOperand()->getFlags(), LN->getAAInfo());
9610
9611 if (Idx == 0 && LN->isIndexed()) {
9612 assert(LN->getAddressingMode() == ISD::PRE_INC &&((LN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector load"
) ? static_cast<void> (0) : __assert_fail ("LN->getAddressingMode() == ISD::PRE_INC && \"Unknown addressing mode on vector load\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9613, __PRETTY_FUNCTION__))
9613 "Unknown addressing mode on vector load")((LN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector load"
) ? static_cast<void> (0) : __assert_fail ("LN->getAddressingMode() == ISD::PRE_INC && \"Unknown addressing mode on vector load\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9613, __PRETTY_FUNCTION__))
;
9614 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
9615 LN->getAddressingMode());
9616 }
9617
9618 Vals[Idx] = Load;
9619 LoadChains[Idx] = Load.getValue(1);
9620
9621 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9622 DAG.getConstant(Stride, dl,
9623 BasePtr.getValueType()));
9624 }
9625
9626 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9627 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
9628
9629 if (LN->isIndexed()) {
9630 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
9631 return DAG.getMergeValues(RetOps, dl);
9632 }
9633
9634 SDValue RetOps[] = { Value, TF };
9635 return DAG.getMergeValues(RetOps, dl);
9636 }
9637
9638 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower")((Op.getValueType() == MVT::v4i1 && "Unknown load to lower"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v4i1 && \"Unknown load to lower\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9638, __PRETTY_FUNCTION__))
;
9639 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported")((LN->isUnindexed() && "Indexed v4i1 loads are not supported"
) ? static_cast<void> (0) : __assert_fail ("LN->isUnindexed() && \"Indexed v4i1 loads are not supported\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9639, __PRETTY_FUNCTION__))
;
9640
9641 // To lower v4i1 from a byte array, we load the byte elements of the
9642 // vector and then reuse the BUILD_VECTOR logic.
9643
9644 SDValue VectElmts[4], VectElmtChains[4];
9645 for (unsigned i = 0; i < 4; ++i) {
9646 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9647 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9648
9649 VectElmts[i] = DAG.getExtLoad(
9650 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
9651 LN->getPointerInfo().getWithOffset(i), MVT::i8,
9652 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
9653 VectElmtChains[i] = VectElmts[i].getValue(1);
9654 }
9655
9656 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
9657 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
9658
9659 SDValue RVals[] = { Value, LoadChain };
9660 return DAG.getMergeValues(RVals, dl);
9661}
9662
9663/// Lowering for QPX v4i1 stores
9664SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
9665 SelectionDAG &DAG) const {
9666 SDLoc dl(Op);
9667 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
9668 SDValue StoreChain = SN->getChain();
9669 SDValue BasePtr = SN->getBasePtr();
9670 SDValue Value = SN->getValue();
9671
9672 if (Value.getValueType() == MVT::v4f64 ||
9673 Value.getValueType() == MVT::v4f32) {
9674 EVT MemVT = SN->getMemoryVT();
9675 unsigned Alignment = SN->getAlignment();
9676
9677 // If this store is properly aligned, then it is legal.
9678 if (Alignment >= MemVT.getStoreSize())
9679 return Op;
9680
9681 EVT ScalarVT = Value.getValueType().getScalarType(),
9682 ScalarMemVT = MemVT.getScalarType();
9683 unsigned Stride = ScalarMemVT.getStoreSize();
9684
9685 SDValue Stores[4];
9686 for (unsigned Idx = 0; Idx < 4; ++Idx) {
9687 SDValue Ex = DAG.getNode(
9688 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
9689 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
9690 SDValue Store;
9691 if (ScalarVT != ScalarMemVT)
9692 Store =
9693 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
9694 SN->getPointerInfo().getWithOffset(Idx * Stride),
9695 ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9696 SN->getMemOperand()->getFlags(), SN->getAAInfo());
9697 else
9698 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
9699 SN->getPointerInfo().getWithOffset(Idx * Stride),
9700 MinAlign(Alignment, Idx * Stride),
9701 SN->getMemOperand()->getFlags(), SN->getAAInfo());
9702
9703 if (Idx == 0 && SN->isIndexed()) {
9704 assert(SN->getAddressingMode() == ISD::PRE_INC &&((SN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector store"
) ? static_cast<void> (0) : __assert_fail ("SN->getAddressingMode() == ISD::PRE_INC && \"Unknown addressing mode on vector store\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9705, __PRETTY_FUNCTION__))
9705 "Unknown addressing mode on vector store")((SN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector store"
) ? static_cast<void> (0) : __assert_fail ("SN->getAddressingMode() == ISD::PRE_INC && \"Unknown addressing mode on vector store\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9705, __PRETTY_FUNCTION__))
;
9706 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
9707 SN->getAddressingMode());
9708 }
9709
9710 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9711 DAG.getConstant(Stride, dl,
9712 BasePtr.getValueType()));
9713 Stores[Idx] = Store;
9714 }
9715
9716 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9717
9718 if (SN->isIndexed()) {
9719 SDValue RetOps[] = { TF, Stores[0].getValue(1) };
9720 return DAG.getMergeValues(RetOps, dl);
9721 }
9722
9723 return TF;
9724 }
9725
9726 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported")((SN->isUnindexed() && "Indexed v4i1 stores are not supported"
) ? static_cast<void> (0) : __assert_fail ("SN->isUnindexed() && \"Indexed v4i1 stores are not supported\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9726, __PRETTY_FUNCTION__))
;
9727 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower")((Value.getValueType() == MVT::v4i1 && "Unknown store to lower"
) ? static_cast<void> (0) : __assert_fail ("Value.getValueType() == MVT::v4i1 && \"Unknown store to lower\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9727, __PRETTY_FUNCTION__))
;
9728
9729 // The values are now known to be -1 (false) or 1 (true). To convert this
9730 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9731 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9732 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9733
9734 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9735 // understand how to form the extending load.
9736 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9737
9738 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9739
9740 // Now convert to an integer and store.
9741 Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9742 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9743 Value);
9744
9745 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9746 int FrameIdx = MFI.CreateStackObject(16, 16, false);
9747 MachinePointerInfo PtrInfo =
9748 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9749 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9750 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9751
9752 SDValue Ops[] = {StoreChain,
9753 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9754 Value, FIdx};
9755 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9756
9757 StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9758 dl, VTs, Ops, MVT::v4i32, PtrInfo);
9759
9760 // Move data into the byte array.
9761 SDValue Loads[4], LoadChains[4];
9762 for (unsigned i = 0; i < 4; ++i) {
9763 unsigned Offset = 4*i;
9764 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9765 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9766
9767 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
9768 PtrInfo.getWithOffset(Offset));
9769 LoadChains[i] = Loads[i].getValue(1);
9770 }
9771
9772 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9773
9774 SDValue Stores[4];
9775 for (unsigned i = 0; i < 4; ++i) {
9776 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9777 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9778
9779 Stores[i] = DAG.getTruncStore(
9780 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
9781 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
9782 SN->getAAInfo());
9783 }
9784
9785 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9786
9787 return StoreChain;
9788}
9789
9790SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
9791 SDLoc dl(Op);
9792 if (Op.getValueType() == MVT::v4i32) {
9793 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9794
9795 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
9796 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
9797
9798 SDValue RHSSwap = // = vrlw RHS, 16
9799 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
9800
9801 // Shrinkify inputs to v8i16.
9802 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
9803 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
9804 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
9805
9806 // Low parts multiplied together, generating 32-bit results (we ignore the
9807 // top parts).
9808 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
9809 LHS, RHS, DAG, dl, MVT::v4i32);
9810
9811 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
9812 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
9813 // Shift the high parts up 16 bits.
9814 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
9815 Neg16, DAG, dl);
9816 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
9817 } else if (Op.getValueType() == MVT::v8i16) {
9818 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9819
9820 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
9821
9822 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
9823 LHS, RHS, Zero, DAG, dl);
9824 } else if (Op.getValueType() == MVT::v16i8) {
9825 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9826 bool isLittleEndian = Subtarget.isLittleEndian();
9827
9828 // Multiply the even 8-bit parts, producing 16-bit sums.
9829 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
9830 LHS, RHS, DAG, dl, MVT::v8i16);
9831 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
9832
9833 // Multiply the odd 8-bit parts, producing 16-bit sums.
9834 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
9835 LHS, RHS, DAG, dl, MVT::v8i16);
9836 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
9837
9838 // Merge the results together. Because vmuleub and vmuloub are
9839 // instructions with a big-endian bias, we must reverse the
9840 // element numbering and reverse the meaning of "odd" and "even"
9841 // when generating little endian code.
9842 int Ops[16];
9843 for (unsigned i = 0; i != 8; ++i) {
9844 if (isLittleEndian) {
9845 Ops[i*2 ] = 2*i;
9846 Ops[i*2+1] = 2*i+16;
9847 } else {
9848 Ops[i*2 ] = 2*i+1;
9849 Ops[i*2+1] = 2*i+1+16;
9850 }
9851 }
9852 if (isLittleEndian)
9853 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
9854 else
9855 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
9856 } else {
9857 llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9857)
;
9858 }
9859}
9860
9861SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
9862
9863 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS")((Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ABS && \"Should only be called for ISD::ABS\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9863, __PRETTY_FUNCTION__))
;
9864
9865 EVT VT = Op.getValueType();
9866 assert(VT.isVector() &&((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9867, __PRETTY_FUNCTION__))
9867 "Only set vector abs as custom, scalar abs shouldn't reach here!")((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9867, __PRETTY_FUNCTION__))
;
9868 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9870, __PRETTY_FUNCTION__))
9869 VT == MVT::v16i8) &&(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9870, __PRETTY_FUNCTION__))
9870 "Unexpected vector element type!")(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9870, __PRETTY_FUNCTION__))
;
9871 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9872, __PRETTY_FUNCTION__))
9872 "Current subtarget doesn't support smax v2i64!")(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9872, __PRETTY_FUNCTION__))
;
9873
9874 // For vector abs, it can be lowered to:
9875 // abs x
9876 // ==>
9877 // y = -x
9878 // smax(x, y)
9879
9880 SDLoc dl(Op);
9881 SDValue X = Op.getOperand(0);
9882 SDValue Zero = DAG.getConstant(0, dl, VT);
9883 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
9884
9885 // SMAX patch https://reviews.llvm.org/D47332
9886 // hasn't landed yet, so use intrinsic first here.
9887 // TODO: Should use SMAX directly once SMAX patch landed
9888 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
9889 if (VT == MVT::v2i64)
9890 BifID = Intrinsic::ppc_altivec_vmaxsd;
9891 else if (VT == MVT::v8i16)
9892 BifID = Intrinsic::ppc_altivec_vmaxsh;
9893 else if (VT == MVT::v16i8)
9894 BifID = Intrinsic::ppc_altivec_vmaxsb;
9895
9896 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
9897}
9898
9899// Custom lowering for fpext vf32 to v2f64
9900SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
9901
9902 assert(Op.getOpcode() == ISD::FP_EXTEND &&((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9903, __PRETTY_FUNCTION__))
9903 "Should only be called for ISD::FP_EXTEND")((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9903, __PRETTY_FUNCTION__))
;
9904
9905 // We only want to custom lower an extend from v2f32 to v2f64.
9906 if (Op.getValueType() != MVT::v2f64 ||
9907 Op.getOperand(0).getValueType() != MVT::v2f32)
9908 return SDValue();
9909
9910 SDLoc dl(Op);
9911 SDValue Op0 = Op.getOperand(0);
9912
9913 switch (Op0.getOpcode()) {
9914 default:
9915 return SDValue();
9916 case ISD::EXTRACT_SUBVECTOR: {
9917 assert(Op0.getNumOperands() == 2 &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9919, __PRETTY_FUNCTION__))
9918 isa<ConstantSDNode>(Op0->getOperand(1)) &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9919, __PRETTY_FUNCTION__))
9919 "Node should have 2 operands with second one being a constant!")((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9919, __PRETTY_FUNCTION__))
;
9920
9921 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
9922 return SDValue();
9923
9924 // Custom lower is only done for high or low doubleword.
9925 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
9926 if (Idx % 2 != 0)
9927 return SDValue();
9928
9929 // Since input is v4f32, at this point Idx is either 0 or 2.
9930 // Shift to get the doubleword position we want.
9931 int DWord = Idx >> 1;
9932
9933 // High and low word positions are different on little endian.
9934 if (Subtarget.isLittleEndian())
9935 DWord ^= 0x1;
9936
9937 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
9938 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
9939 }
9940 case ISD::FADD:
9941 case ISD::FMUL:
9942 case ISD::FSUB: {
9943 SDValue NewLoad[2];
9944 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
9945 // Ensure both input are loads.
9946 SDValue LdOp = Op0.getOperand(i);
9947 if (LdOp.getOpcode() != ISD::LOAD)
9948 return SDValue();
9949 // Generate new load node.
9950 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
9951 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
9952 NewLoad[i] = DAG.getMemIntrinsicNode(
9953 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
9954 LD->getMemoryVT(), LD->getMemOperand());
9955 }
9956 SDValue NewOp =
9957 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
9958 NewLoad[1], Op0.getNode()->getFlags());
9959 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
9960 DAG.getConstant(0, dl, MVT::i32));
9961 }
9962 case ISD::LOAD: {
9963 LoadSDNode *LD = cast<LoadSDNode>(Op0);
9964 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
9965 SDValue NewLd = DAG.getMemIntrinsicNode(
9966 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
9967 LD->getMemoryVT(), LD->getMemOperand());
9968 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
9969 DAG.getConstant(0, dl, MVT::i32));
9970 }
9971 }
9972 llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9972)
;
9973}
9974
9975/// LowerOperation - Provide custom lowering hooks for some operations.
9976///
9977SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9978 switch (Op.getOpcode()) {
9979 default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9979)
;
9980 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
9981 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
9982 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
9983 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
9984 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
9985 case ISD::SETCC: return LowerSETCC(Op, DAG);
9986 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
9987 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
9988
9989 // Variable argument lowering.
9990 case ISD::VASTART: return LowerVASTART(Op, DAG);
9991 case ISD::VAARG: return LowerVAARG(Op, DAG);
9992 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
9993
9994 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
9995 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
9996 case ISD::GET_DYNAMIC_AREA_OFFSET:
9997 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
9998
9999 // Exception handling lowering.
10000 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10001 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10002 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10003
10004 case ISD::LOAD: return LowerLOAD(Op, DAG);
10005 case ISD::STORE: return LowerSTORE(Op, DAG);
10006 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10007 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10008 case ISD::FP_TO_UINT:
10009 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10010 case ISD::UINT_TO_FP:
10011 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10012 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10013
10014 // Lower 64-bit shifts.
10015 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10016 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10017 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10018
10019 // Vector-related lowering.
10020 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10021 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10022 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10023 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10024 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
10025 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10026 case ISD::MUL: return LowerMUL(Op, DAG);
10027 case ISD::ABS: return LowerABS(Op, DAG);
10028 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10029
10030 // For counter-based loop handling.
10031 case ISD::INTRINSIC_W_CHAIN: return SDValue();
10032
10033 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10034
10035 // Frame & Return address.
10036 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10037 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10038
10039 case ISD::INTRINSIC_VOID:
10040 return LowerINTRINSIC_VOID(Op, DAG);
10041 case ISD::SREM:
10042 case ISD::UREM:
10043 return LowerREM(Op, DAG);
10044 case ISD::BSWAP:
10045 return LowerBSWAP(Op, DAG);
10046 case ISD::ATOMIC_CMP_SWAP:
10047 return LowerATOMIC_CMP_SWAP(Op, DAG);
10048 }
10049}
10050
10051void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10052 SmallVectorImpl<SDValue>&Results,
10053 SelectionDAG &DAG) const {
10054 SDLoc dl(N);
10055 switch (N->getOpcode()) {
10056 default:
10057 llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10057)
;
10058 case ISD::READCYCLECOUNTER: {
10059 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10060 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10061
10062 Results.push_back(RTB);
10063 Results.push_back(RTB.getValue(1));
10064 Results.push_back(RTB.getValue(2));
10065 break;
10066 }
10067 case ISD::INTRINSIC_W_CHAIN: {
10068 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10069 Intrinsic::loop_decrement)
10070 break;
10071
10072 assert(N->getValueType(0) == MVT::i1 &&((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10073, __PRETTY_FUNCTION__))
10073 "Unexpected result type for CTR decrement intrinsic")((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10073, __PRETTY_FUNCTION__))
;
10074 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10075 N->getValueType(0));
10076 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10077 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10078 N->getOperand(1));
10079
10080 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10081 Results.push_back(NewInt.getValue(1));
10082 break;
10083 }
10084 case ISD::VAARG: {
10085 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10086 return;
10087
10088 EVT VT = N->getValueType(0);
10089
10090 if (VT == MVT::i64) {
10091 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10092
10093 Results.push_back(NewNode);
10094 Results.push_back(NewNode.getValue(1));
10095 }
10096 return;
10097 }
10098 case ISD::FP_TO_SINT:
10099 case ISD::FP_TO_UINT:
10100 // LowerFP_TO_INT() can only handle f32 and f64.
10101 if (N->getOperand(0).getValueType() == MVT::ppcf128)
10102 return;
10103 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10104 return;
10105 case ISD::TRUNCATE: {
10106 EVT TrgVT = N->getValueType(0);
10107 EVT OpVT = N->getOperand(0).getValueType();
10108 if (TrgVT.isVector() &&
10109 isOperationCustom(N->getOpcode(), TrgVT) &&
10110 OpVT.getSizeInBits() <= 128 &&
10111 isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
10112 Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10113 return;
10114 }
10115 case ISD::BITCAST:
10116 // Don't handle bitcast here.
10117 return;
10118 }
10119}
10120
10121//===----------------------------------------------------------------------===//
10122// Other Lowering Code
10123//===----------------------------------------------------------------------===//
10124
10125static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10126 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10127 Function *Func = Intrinsic::getDeclaration(M, Id);
10128 return Builder.CreateCall(Func, {});
10129}
10130
10131// The mappings for emitLeading/TrailingFence is taken from
10132// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10133Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10134 Instruction *Inst,
10135 AtomicOrdering Ord) const {
10136 if (Ord == AtomicOrdering::SequentiallyConsistent)
10137 return callIntrinsic(Builder, Intrinsic::ppc_sync);
10138 if (isReleaseOrStronger(Ord))
10139 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10140 return nullptr;
10141}
10142
10143Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10144 Instruction *Inst,
10145 AtomicOrdering Ord) const {
10146 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10147 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10148 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10149 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10150 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10151 return Builder.CreateCall(
10152 Intrinsic::getDeclaration(
10153 Builder.GetInsertBlock()->getParent()->getParent(),
10154 Intrinsic::ppc_cfence, {Inst->getType()}),
10155 {Inst});
10156 // FIXME: Can use isync for rmw operation.
10157 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10158 }
10159 return nullptr;
10160}
10161
10162MachineBasicBlock *
10163PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10164 unsigned AtomicSize,
10165 unsigned BinOpcode,
10166 unsigned CmpOpcode,
10167 unsigned CmpPred) const {
10168 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10169 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10170
10171 auto LoadMnemonic = PPC::LDARX;
10172 auto StoreMnemonic = PPC::STDCX;
10173 switch (AtomicSize) {
10174 default:
10175 llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10175)
;
10176 case 1:
10177 LoadMnemonic = PPC::LBARX;
10178 StoreMnemonic = PPC::STBCX;
10179 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10179, __PRETTY_FUNCTION__))
;
10180 break;
10181 case 2:
10182 LoadMnemonic = PPC::LHARX;
10183 StoreMnemonic = PPC::STHCX;
10184 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10184, __PRETTY_FUNCTION__))
;
10185 break;
10186 case 4:
10187 LoadMnemonic = PPC::LWARX;
10188 StoreMnemonic = PPC::STWCX;
10189 break;
10190 case 8:
10191 LoadMnemonic = PPC::LDARX;
10192 StoreMnemonic = PPC::STDCX;
10193 break;
10194 }
10195
10196 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10197 MachineFunction *F = BB->getParent();
10198 MachineFunction::iterator It = ++BB->getIterator();
10199
10200 Register dest = MI.getOperand(0).getReg();
10201 Register ptrA = MI.getOperand(1).getReg();
10202 Register ptrB = MI.getOperand(2).getReg();
10203 Register incr = MI.getOperand(3).getReg();
10204 DebugLoc dl = MI.getDebugLoc();
10205
10206 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10207 MachineBasicBlock *loop2MBB =
10208 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10209 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10210 F->insert(It, loopMBB);
10211 if (CmpOpcode)
10212 F->insert(It, loop2MBB);
10213 F->insert(It, exitMBB);
10214 exitMBB->splice(exitMBB->begin(), BB,
10215 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10216 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10217
10218 MachineRegisterInfo &RegInfo = F->getRegInfo();
10219 Register TmpReg = (!BinOpcode) ? incr :
10220 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10221 : &PPC::GPRCRegClass);
10222
10223 // thisMBB:
10224 // ...
10225 // fallthrough --> loopMBB
10226 BB->addSuccessor(loopMBB);
10227
10228 // loopMBB:
10229 // l[wd]arx dest, ptr
10230 // add r0, dest, incr
10231 // st[wd]cx. r0, ptr
10232 // bne- loopMBB
10233 // fallthrough --> exitMBB
10234
10235 // For max/min...
10236 // loopMBB:
10237 // l[wd]arx dest, ptr
10238 // cmpl?[wd] incr, dest
10239 // bgt exitMBB
10240 // loop2MBB:
10241 // st[wd]cx. dest, ptr
10242 // bne- loopMBB
10243 // fallthrough --> exitMBB
10244
10245 BB = loopMBB;
10246 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10247 .addReg(ptrA).addReg(ptrB);
10248 if (BinOpcode)
10249 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10250 if (CmpOpcode) {
10251 // Signed comparisons of byte or halfword values must be sign-extended.
10252 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10253 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10254 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10255 ExtReg).addReg(dest);
10256 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10257 .addReg(incr).addReg(ExtReg);
10258 } else
10259 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10260 .addReg(incr).addReg(dest);
10261
10262 BuildMI(BB, dl, TII->get(PPC::BCC))
10263 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10264 BB->addSuccessor(loop2MBB);
10265 BB->addSuccessor(exitMBB);
10266 BB = loop2MBB;
10267 }
10268 BuildMI(BB, dl, TII->get(StoreMnemonic))
10269 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10270 BuildMI(BB, dl, TII->get(PPC::BCC))
10271 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10272 BB->addSuccessor(loopMBB);
10273 BB->addSuccessor(exitMBB);
10274
10275 // exitMBB:
10276 // ...
10277 BB = exitMBB;
10278 return BB;
10279}
10280
10281MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10282 MachineInstr &MI, MachineBasicBlock *BB,
10283 bool is8bit, // operation
10284 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10285 // If we support part-word atomic mnemonics, just use them
10286 if (Subtarget.hasPartwordAtomics())
10287 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10288 CmpPred);
10289
10290 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10291 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10292 // In 64 bit mode we have to use 64 bits for addresses, even though the
10293 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10294 // registers without caring whether they're 32 or 64, but here we're
10295 // doing actual arithmetic on the addresses.
10296 bool is64bit = Subtarget.isPPC64();
10297 bool isLittleEndian = Subtarget.isLittleEndian();
10298 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10299
10300 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10301 MachineFunction *F = BB->getParent();
10302 MachineFunction::iterator It = ++BB->getIterator();
10303
10304 Register dest = MI.getOperand(0).getReg();
10305 Register ptrA = MI.getOperand(1).getReg();
10306 Register ptrB = MI.getOperand(2).getReg();
10307 Register incr = MI.getOperand(3).getReg();
10308 DebugLoc dl = MI.getDebugLoc();
10309
10310 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10311 MachineBasicBlock *loop2MBB =
10312 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10313 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10314 F->insert(It, loopMBB);
10315 if (CmpOpcode)
10316 F->insert(It, loop2MBB);
10317 F->insert(It, exitMBB);
10318 exitMBB->splice(exitMBB->begin(), BB,
10319 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10320 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10321
10322 MachineRegisterInfo &RegInfo = F->getRegInfo();
10323 const TargetRegisterClass *RC =
10324 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10325 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10326
10327 Register PtrReg = RegInfo.createVirtualRegister(RC);
10328 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10329 Register ShiftReg =
10330 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10331 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10332 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10333 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10334 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10335 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10336 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10337 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10338 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10339 Register Ptr1Reg;
10340 Register TmpReg =
10341 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10342
10343 // thisMBB:
10344 // ...
10345 // fallthrough --> loopMBB
10346 BB->addSuccessor(loopMBB);
10347
10348 // The 4-byte load must be aligned, while a char or short may be
10349 // anywhere in the word. Hence all this nasty bookkeeping code.
10350 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10351 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10352 // xori shift, shift1, 24 [16]
10353 // rlwinm ptr, ptr1, 0, 0, 29
10354 // slw incr2, incr, shift
10355 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10356 // slw mask, mask2, shift
10357 // loopMBB:
10358 // lwarx tmpDest, ptr
10359 // add tmp, tmpDest, incr2
10360 // andc tmp2, tmpDest, mask
10361 // and tmp3, tmp, mask
10362 // or tmp4, tmp3, tmp2
10363 // stwcx. tmp4, ptr
10364 // bne- loopMBB
10365 // fallthrough --> exitMBB
10366 // srw dest, tmpDest, shift
10367 if (ptrA != ZeroReg) {
10368 Ptr1Reg = RegInfo.createVirtualRegister(RC);
10369 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10370 .addReg(ptrA)
10371 .addReg(ptrB);
10372 } else {
10373 Ptr1Reg = ptrB;
10374 }
10375 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10376 // mode.
10377 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10378 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10379 .addImm(3)
10380 .addImm(27)
10381 .addImm(is8bit ? 28 : 27);
10382 if (!isLittleEndian)
10383 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10384 .addReg(Shift1Reg)
10385 .addImm(is8bit ? 24 : 16);
10386 if (is64bit)
10387 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10388 .addReg(Ptr1Reg)
10389 .addImm(0)
10390 .addImm(61);
10391 else
10392 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10393 .addReg(Ptr1Reg)
10394 .addImm(0)
10395 .addImm(0)
10396 .addImm(29);
10397 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10398 if (is8bit)
10399 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10400 else {
10401 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10402 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10403 .addReg(Mask3Reg)
10404 .addImm(65535);
10405 }
10406 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10407 .addReg(Mask2Reg)
10408 .addReg(ShiftReg);
10409
10410 BB = loopMBB;
10411 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10412 .addReg(ZeroReg)
10413 .addReg(PtrReg);
10414 if (BinOpcode)
10415 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10416 .addReg(Incr2Reg)
10417 .addReg(TmpDestReg);
10418 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10419 .addReg(TmpDestReg)
10420 .addReg(MaskReg);
10421 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10422 if (CmpOpcode) {
10423 // For unsigned comparisons, we can directly compare the shifted values.
10424 // For signed comparisons we shift and sign extend.
10425 Register SReg = RegInfo.createVirtualRegister(GPRC);
10426 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10427 .addReg(TmpDestReg)
10428 .addReg(MaskReg);
10429 unsigned ValueReg = SReg;
10430 unsigned CmpReg = Incr2Reg;
10431 if (CmpOpcode == PPC::CMPW) {
10432 ValueReg = RegInfo.createVirtualRegister(GPRC);
10433 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10434 .addReg(SReg)
10435 .addReg(ShiftReg);
10436 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10437 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10438 .addReg(ValueReg);
10439 ValueReg = ValueSReg;
10440 CmpReg = incr;
10441 }
10442 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10443 .addReg(CmpReg)
10444 .addReg(ValueReg);
10445 BuildMI(BB, dl, TII->get(PPC::BCC))
10446 .addImm(CmpPred)
10447 .addReg(PPC::CR0)
10448 .addMBB(exitMBB);
10449 BB->addSuccessor(loop2MBB);
10450 BB->addSuccessor(exitMBB);
10451 BB = loop2MBB;
10452 }
10453 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10454 BuildMI(BB, dl, TII->get(PPC::STWCX))
10455 .addReg(Tmp4Reg)
10456 .addReg(ZeroReg)
10457 .addReg(PtrReg);
10458 BuildMI(BB, dl, TII->get(PPC::BCC))
10459 .addImm(PPC::PRED_NE)
10460 .addReg(PPC::CR0)
10461 .addMBB(loopMBB);
10462 BB->addSuccessor(loopMBB);
10463 BB->addSuccessor(exitMBB);
10464
10465 // exitMBB:
10466 // ...
10467 BB = exitMBB;
10468 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10469 .addReg(TmpDestReg)
10470 .addReg(ShiftReg);
10471 return BB;
10472}
10473
10474llvm::MachineBasicBlock *
10475PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
10476 MachineBasicBlock *MBB) const {
10477 DebugLoc DL = MI.getDebugLoc();
10478 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10479 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10480
10481 MachineFunction *MF = MBB->getParent();
10482 MachineRegisterInfo &MRI = MF->getRegInfo();
10483
10484 const BasicBlock *BB = MBB->getBasicBlock();
10485 MachineFunction::iterator I = ++MBB->getIterator();
10486
10487 Register DstReg = MI.getOperand(0).getReg();
10488 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10489 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")((TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"
) ? static_cast<void> (0) : __assert_fail ("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10489, __PRETTY_FUNCTION__))
;
10490 Register mainDstReg = MRI.createVirtualRegister(RC);
10491 Register restoreDstReg = MRI.createVirtualRegister(RC);
10492
10493 MVT PVT = getPointerTy(MF->getDataLayout());
10494 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10495, __PRETTY_FUNCTION__))
10495 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10495, __PRETTY_FUNCTION__))
;
10496 // For v = setjmp(buf), we generate
10497 //
10498 // thisMBB:
10499 // SjLjSetup mainMBB
10500 // bl mainMBB
10501 // v_restore = 1
10502 // b sinkMBB
10503 //
10504 // mainMBB:
10505 // buf[LabelOffset] = LR
10506 // v_main = 0
10507 //
10508 // sinkMBB:
10509 // v = phi(main, restore)
10510 //
10511
10512 MachineBasicBlock *thisMBB = MBB;
10513 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10514 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10515 MF->insert(I, mainMBB);
10516 MF->insert(I, sinkMBB);
10517
10518 MachineInstrBuilder MIB;
10519
10520 // Transfer the remainder of BB and its successor edges to sinkMBB.
10521 sinkMBB->splice(sinkMBB->begin(), MBB,
10522 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10523 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10524
10525 // Note that the structure of the jmp_buf used here is not compatible
10526 // with that used by libc, and is not designed to be. Specifically, it
10527 // stores only those 'reserved' registers that LLVM does not otherwise
10528 // understand how to spill. Also, by convention, by the time this
10529 // intrinsic is called, Clang has already stored the frame address in the
10530 // first slot of the buffer and stack address in the third. Following the
10531 // X86 target code, we'll store the jump address in the second slot. We also
10532 // need to save the TOC pointer (R2) to handle jumps between shared
10533 // libraries, and that will be stored in the fourth slot. The thread
10534 // identifier (R13) is not affected.
10535
10536 // thisMBB:
10537 const int64_t LabelOffset = 1 * PVT.getStoreSize();
10538 const int64_t TOCOffset = 3 * PVT.getStoreSize();
10539 const int64_t BPOffset = 4 * PVT.getStoreSize();
10540
10541 // Prepare IP either in reg.
10542 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10543 Register LabelReg = MRI.createVirtualRegister(PtrRC);
10544 Register BufReg = MI.getOperand(1).getReg();
10545
10546 if (Subtarget.is64BitELFABI()) {
10547 setUsesTOCBasePtr(*MBB->getParent());
10548 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10549 .addReg(PPC::X2)
10550 .addImm(TOCOffset)
10551 .addReg(BufReg)
10552 .cloneMemRefs(MI);
10553 }
10554
10555 // Naked functions never have a base pointer, and so we use r1. For all
10556 // other functions, this decision must be delayed until during PEI.
10557 unsigned BaseReg;
10558 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10559 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10560 else
10561 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10562
10563 MIB = BuildMI(*thisMBB, MI, DL,
10564 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10565 .addReg(BaseReg)
10566 .addImm(BPOffset)
10567 .addReg(BufReg)
10568 .cloneMemRefs(MI);
10569
10570 // Setup
10571 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10572 MIB.addRegMask(TRI->getNoPreservedMask());
10573
10574 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10575
10576 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10577 .addMBB(mainMBB);
10578 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10579
10580 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10581 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10582
10583 // mainMBB:
10584 // mainDstReg = 0
10585 MIB =
10586 BuildMI(mainMBB, DL,
10587 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
10588
10589 // Store IP
10590 if (Subtarget.isPPC64()) {
10591 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
10592 .addReg(LabelReg)
10593 .addImm(LabelOffset)
10594 .addReg(BufReg);
10595 } else {
10596 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
10597 .addReg(LabelReg)
10598 .addImm(LabelOffset)
10599 .addReg(BufReg);
10600 }
10601 MIB.cloneMemRefs(MI);
10602
10603 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
10604 mainMBB->addSuccessor(sinkMBB);
10605
10606 // sinkMBB:
10607 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
10608 TII->get(PPC::PHI), DstReg)
10609 .addReg(mainDstReg).addMBB(mainMBB)
10610 .addReg(restoreDstReg).addMBB(thisMBB);
10611
10612 MI.eraseFromParent();
10613 return sinkMBB;
10614}
10615
10616MachineBasicBlock *
10617PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
10618 MachineBasicBlock *MBB) const {
10619 DebugLoc DL = MI.getDebugLoc();
10620 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10621
10622 MachineFunction *MF = MBB->getParent();
10623 MachineRegisterInfo &MRI = MF->getRegInfo();
10624
10625 MVT PVT = getPointerTy(MF->getDataLayout());
10626 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10627, __PRETTY_FUNCTION__))
10627 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10627, __PRETTY_FUNCTION__))
;
10628
10629 const TargetRegisterClass *RC =
10630 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10631 Register Tmp = MRI.createVirtualRegister(RC);
10632 // Since FP is only updated here but NOT referenced, it's treated as GPR.
10633 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
10634 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
10635 unsigned BP =
10636 (PVT == MVT::i64)
10637 ? PPC::X30
10638 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10639 : PPC::R30);
10640
10641 MachineInstrBuilder MIB;
10642
10643 const int64_t LabelOffset = 1 * PVT.getStoreSize();
10644 const int64_t SPOffset = 2 * PVT.getStoreSize();
10645 const int64_t TOCOffset = 3 * PVT.getStoreSize();
10646 const int64_t BPOffset = 4 * PVT.getStoreSize();
10647
10648 Register BufReg = MI.getOperand(0).getReg();
10649
10650 // Reload FP (the jumped-to function may not have had a
10651 // frame pointer, and if so, then its r31 will be restored
10652 // as necessary).
10653 if (PVT == MVT::i64) {
10654 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
10655 .addImm(0)
10656 .addReg(BufReg);
10657 } else {
10658 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
10659 .addImm(0)
10660 .addReg(BufReg);
10661 }
10662 MIB.cloneMemRefs(MI);
10663
10664 // Reload IP
10665 if (PVT == MVT::i64) {
10666 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
10667 .addImm(LabelOffset)
10668 .addReg(BufReg);
10669 } else {
10670 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
10671 .addImm(LabelOffset)
10672 .addReg(BufReg);
10673 }
10674 MIB.cloneMemRefs(MI);
10675
10676 // Reload SP
10677 if (PVT == MVT::i64) {
10678 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
10679 .addImm(SPOffset)
10680 .addReg(BufReg);
10681 } else {
10682 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
10683 .addImm(SPOffset)
10684 .addReg(BufReg);
10685 }
10686 MIB.cloneMemRefs(MI);
10687
10688 // Reload BP
10689 if (PVT == MVT::i64) {
10690 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
10691 .addImm(BPOffset)
10692 .addReg(BufReg);
10693 } else {
10694 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
10695 .addImm(BPOffset)
10696 .addReg(BufReg);
10697 }
10698 MIB.cloneMemRefs(MI);
10699
10700 // Reload TOC
10701 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
10702 setUsesTOCBasePtr(*MBB->getParent());
10703 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
10704 .addImm(TOCOffset)
10705 .addReg(BufReg)
10706 .cloneMemRefs(MI);
10707 }
10708
10709 // Jump
10710 BuildMI(*MBB, MI, DL,
10711 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
10712 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
10713
10714 MI.eraseFromParent();
10715 return MBB;
10716}
10717
10718MachineBasicBlock *
10719PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
10720 MachineBasicBlock *BB) const {
10721 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
10722 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10723 if (Subtarget.is64BitELFABI() &&
10724 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10725 // Call lowering should have added an r2 operand to indicate a dependence
10726 // on the TOC base pointer value. It can't however, because there is no
10727 // way to mark the dependence as implicit there, and so the stackmap code
10728 // will confuse it with a regular operand. Instead, add the dependence
10729 // here.
10730 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
10731 }
10732
10733 return emitPatchPoint(MI, BB);
10734 }
10735
10736 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
10737 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
10738 return emitEHSjLjSetJmp(MI, BB);
10739 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
10740 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
10741 return emitEHSjLjLongJmp(MI, BB);
10742 }
10743
10744 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10745
10746 // To "insert" these instructions we actually have to insert their
10747 // control-flow patterns.
10748 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10749 MachineFunction::iterator It = ++BB->getIterator();
10750
10751 MachineFunction *F = BB->getParent();
10752
10753 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10754 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
10755 MI.getOpcode() == PPC::SELECT_I8) {
10756 SmallVector<MachineOperand, 2> Cond;
10757 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10758 MI.getOpcode() == PPC::SELECT_CC_I8)
10759 Cond.push_back(MI.getOperand(4));
10760 else
10761 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
10762 Cond.push_back(MI.getOperand(1));
10763
10764 DebugLoc dl = MI.getDebugLoc();
10765 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
10766 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
10767 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10768 MI.getOpcode() == PPC::SELECT_CC_I8 ||
10769 MI.getOpcode() == PPC::SELECT_CC_F4 ||
10770 MI.getOpcode() == PPC::SELECT_CC_F8 ||
10771 MI.getOpcode() == PPC::SELECT_CC_F16 ||
10772 MI.getOpcode() == PPC::SELECT_CC_QFRC ||
10773 MI.getOpcode() == PPC::SELECT_CC_QSRC ||
10774 MI.getOpcode() == PPC::SELECT_CC_QBRC ||
10775 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
10776 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
10777 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
10778 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
10779 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
10780 MI.getOpcode() == PPC::SELECT_CC_SPE ||
10781 MI.getOpcode() == PPC::SELECT_I4 ||
10782 MI.getOpcode() == PPC::SELECT_I8 ||
10783 MI.getOpcode() == PPC::SELECT_F4 ||
10784 MI.getOpcode() == PPC::SELECT_F8 ||
10785 MI.getOpcode() == PPC::SELECT_F16 ||
10786 MI.getOpcode() == PPC::SELECT_QFRC ||
10787 MI.getOpcode() == PPC::SELECT_QSRC ||
10788 MI.getOpcode() == PPC::SELECT_QBRC ||
10789 MI.getOpcode() == PPC::SELECT_SPE ||
10790 MI.getOpcode() == PPC::SELECT_SPE4 ||
10791 MI.getOpcode() == PPC::SELECT_VRRC ||
10792 MI.getOpcode() == PPC::SELECT_VSFRC ||
10793 MI.getOpcode() == PPC::SELECT_VSSRC ||
10794 MI.getOpcode() == PPC::SELECT_VSRC) {
10795 // The incoming instruction knows the destination vreg to set, the
10796 // condition code register to branch on, the true/false values to
10797 // select between, and a branch opcode to use.
10798
10799 // thisMBB:
10800 // ...
10801 // TrueVal = ...
10802 // cmpTY ccX, r1, r2
10803 // bCC copy1MBB
10804 // fallthrough --> copy0MBB
10805 MachineBasicBlock *thisMBB = BB;
10806 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10807 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10808 DebugLoc dl = MI.getDebugLoc();
10809 F->insert(It, copy0MBB);
10810 F->insert(It, sinkMBB);
10811
10812 // Transfer the remainder of BB and its successor edges to sinkMBB.
10813 sinkMBB->splice(sinkMBB->begin(), BB,
10814 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10815 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10816
10817 // Next, add the true and fallthrough blocks as its successors.
10818 BB->addSuccessor(copy0MBB);
10819 BB->addSuccessor(sinkMBB);
10820
10821 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
10822 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
10823 MI.getOpcode() == PPC::SELECT_F16 ||
10824 MI.getOpcode() == PPC::SELECT_SPE4 ||
10825 MI.getOpcode() == PPC::SELECT_SPE ||
10826 MI.getOpcode() == PPC::SELECT_QFRC ||
10827 MI.getOpcode() == PPC::SELECT_QSRC ||
10828 MI.getOpcode() == PPC::SELECT_QBRC ||
10829 MI.getOpcode() == PPC::SELECT_VRRC ||
10830 MI.getOpcode() == PPC::SELECT_VSFRC ||
10831 MI.getOpcode() == PPC::SELECT_VSSRC ||
10832 MI.getOpcode() == PPC::SELECT_VSRC) {
10833 BuildMI(BB, dl, TII->get(PPC::BC))
10834 .addReg(MI.getOperand(1).getReg())
10835 .addMBB(sinkMBB);
10836 } else {
10837 unsigned SelectPred = MI.getOperand(4).getImm();
10838 BuildMI(BB, dl, TII->get(PPC::BCC))
10839 .addImm(SelectPred)
10840 .addReg(MI.getOperand(1).getReg())
10841 .addMBB(sinkMBB);
10842 }
10843
10844 // copy0MBB:
10845 // %FalseValue = ...
10846 // # fallthrough to sinkMBB
10847 BB = copy0MBB;
10848
10849 // Update machine-CFG edges
10850 BB->addSuccessor(sinkMBB);
10851
10852 // sinkMBB:
10853 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10854 // ...
10855 BB = sinkMBB;
10856 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
10857 .addReg(MI.getOperand(3).getReg())
10858 .addMBB(copy0MBB)
10859 .addReg(MI.getOperand(2).getReg())
10860 .addMBB(thisMBB);
10861 } else if (MI.getOpcode() == PPC::ReadTB) {
10862 // To read the 64-bit time-base register on a 32-bit target, we read the
10863 // two halves. Should the counter have wrapped while it was being read, we
10864 // need to try again.
10865 // ...
10866 // readLoop:
10867 // mfspr Rx,TBU # load from TBU
10868 // mfspr Ry,TB # load from TB
10869 // mfspr Rz,TBU # load from TBU
10870 // cmpw crX,Rx,Rz # check if 'old'='new'
10871 // bne readLoop # branch if they're not equal
10872 // ...
10873
10874 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
10875 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10876 DebugLoc dl = MI.getDebugLoc();
10877 F->insert(It, readMBB);
10878 F->insert(It, sinkMBB);
10879
10880 // Transfer the remainder of BB and its successor edges to sinkMBB.
10881 sinkMBB->splice(sinkMBB->begin(), BB,
10882 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10883 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10884
10885 BB->addSuccessor(readMBB);
10886 BB = readMBB;
10887
10888 MachineRegisterInfo &RegInfo = F->getRegInfo();
10889 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10890 Register LoReg = MI.getOperand(0).getReg();
10891 Register HiReg = MI.getOperand(1).getReg();
10892
10893 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
10894 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
10895 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
10896
10897 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10898
10899 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
10900 .addReg(HiReg)
10901 .addReg(ReadAgainReg);
10902 BuildMI(BB, dl, TII->get(PPC::BCC))
10903 .addImm(PPC::PRED_NE)
10904 .addReg(CmpReg)
10905 .addMBB(readMBB);
10906
10907 BB->addSuccessor(readMBB);
10908 BB->addSuccessor(sinkMBB);
10909 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
10910 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
10911 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
10912 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
10913 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
10914 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
10915 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
10916 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
10917
10918 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
10919 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
10920 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
10921 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
10922 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
10923 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
10924 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
10925 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
10926
10927 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
10928 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
10929 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
10930 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
10931 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
10932 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
10933 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
10934 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
10935
10936 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
10937 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
10938 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
10939 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
10940 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
10941 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
10942 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
10943 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
10944
10945 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
10946 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
10947 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
10948 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
10949 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
10950 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
10951 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
10952 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
10953
10954 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
10955 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
10956 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
10957 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
10958 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
10959 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
10960 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
10961 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
10962
10963 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
10964 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
10965 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
10966 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
10967 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
10968 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
10969 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
10970 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
10971
10972 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
10973 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
10974 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
10975 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
10976 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
10977 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
10978 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
10979 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
10980
10981 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
10982 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
10983 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
10984 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
10985 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
10986 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
10987 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
10988 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
10989
10990 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
10991 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
10992 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
10993 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
10994 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
10995 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
10996 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
10997 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
10998
10999 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11000 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11001 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11002 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11003 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11004 BB = EmitAtomicBinary(MI, BB, 4, 0);
11005 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11006 BB = EmitAtomicBinary(MI, BB, 8, 0);
11007 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11008 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11009 (Subtarget.hasPartwordAtomics() &&
11010 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11011 (Subtarget.hasPartwordAtomics() &&
11012 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11013 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11014
11015 auto LoadMnemonic = PPC::LDARX;
11016 auto StoreMnemonic = PPC::STDCX;
11017 switch (MI.getOpcode()) {
11018 default:
11019 llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11019)
;
11020 case PPC::ATOMIC_CMP_SWAP_I8:
11021 LoadMnemonic = PPC::LBARX;
11022 StoreMnemonic = PPC::STBCX;
11023 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11023, __PRETTY_FUNCTION__))
;
11024 break;
11025 case PPC::ATOMIC_CMP_SWAP_I16:
11026 LoadMnemonic = PPC::LHARX;
11027 StoreMnemonic = PPC::STHCX;
11028 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11028, __PRETTY_FUNCTION__))
;
11029 break;
11030 case PPC::ATOMIC_CMP_SWAP_I32:
11031 LoadMnemonic = PPC::LWARX;
11032 StoreMnemonic = PPC::STWCX;
11033 break;
11034 case PPC::ATOMIC_CMP_SWAP_I64:
11035 LoadMnemonic = PPC::LDARX;
11036 StoreMnemonic = PPC::STDCX;
11037 break;
11038 }
11039 Register dest = MI.getOperand(0).getReg();
11040 Register ptrA = MI.getOperand(1).getReg();
11041 Register ptrB = MI.getOperand(2).getReg();
11042 Register oldval = MI.getOperand(3).getReg();
11043 Register newval = MI.getOperand(4).getReg();
11044 DebugLoc dl = MI.getDebugLoc();
11045
11046 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11047 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11048 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11049 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11050 F->insert(It, loop1MBB);
11051 F->insert(It, loop2MBB);
11052 F->insert(It, midMBB);
11053 F->insert(It, exitMBB);
11054 exitMBB->splice(exitMBB->begin(), BB,
11055 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11056 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11057
11058 // thisMBB:
11059 // ...
11060 // fallthrough --> loopMBB
11061 BB->addSuccessor(loop1MBB);
11062
11063 // loop1MBB:
11064 // l[bhwd]arx dest, ptr
11065 // cmp[wd] dest, oldval
11066 // bne- midMBB
11067 // loop2MBB:
11068 // st[bhwd]cx. newval, ptr
11069 // bne- loopMBB
11070 // b exitBB
11071 // midMBB:
11072 // st[bhwd]cx. dest, ptr
11073 // exitBB:
11074 BB = loop1MBB;
11075 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11076 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11077 .addReg(oldval)
11078 .addReg(dest);
11079 BuildMI(BB, dl, TII->get(PPC::BCC))
11080 .addImm(PPC::PRED_NE)
11081 .addReg(PPC::CR0)
11082 .addMBB(midMBB);
11083 BB->addSuccessor(loop2MBB);
11084 BB->addSuccessor(midMBB);
11085
11086 BB = loop2MBB;
11087 BuildMI(BB, dl, TII->get(StoreMnemonic))
11088 .addReg(newval)
11089 .addReg(ptrA)
11090 .addReg(ptrB);
11091 BuildMI(BB, dl, TII->get(PPC::BCC))
11092 .addImm(PPC::PRED_NE)
11093 .addReg(PPC::CR0)
11094 .addMBB(loop1MBB);
11095 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11096 BB->addSuccessor(loop1MBB);
11097 BB->addSuccessor(exitMBB);
11098
11099 BB = midMBB;
11100 BuildMI(BB, dl, TII->get(StoreMnemonic))
11101 .addReg(dest)
11102 .addReg(ptrA)
11103 .addReg(ptrB);
11104 BB->addSuccessor(exitMBB);
11105
11106 // exitMBB:
11107 // ...
11108 BB = exitMBB;
11109 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11110 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11111 // We must use 64-bit registers for addresses when targeting 64-bit,
11112 // since we're actually doing arithmetic on them. Other registers
11113 // can be 32-bit.
11114 bool is64bit = Subtarget.isPPC64();
11115 bool isLittleEndian = Subtarget.isLittleEndian();
11116 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11117
11118 Register dest = MI.getOperand(0).getReg();
11119 Register ptrA = MI.getOperand(1).getReg();
11120 Register ptrB = MI.getOperand(2).getReg();
11121 Register oldval = MI.getOperand(3).getReg();
11122 Register newval = MI.getOperand(4).getReg();
11123 DebugLoc dl = MI.getDebugLoc();
11124
11125 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11126 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11127 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11128 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11129 F->insert(It, loop1MBB);
11130 F->insert(It, loop2MBB);
11131 F->insert(It, midMBB);
11132 F->insert(It, exitMBB);
11133 exitMBB->splice(exitMBB->begin(), BB,
11134 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11135 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11136
11137 MachineRegisterInfo &RegInfo = F->getRegInfo();
11138 const TargetRegisterClass *RC =
11139 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11140 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11141
11142 Register PtrReg = RegInfo.createVirtualRegister(RC);
11143 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11144 Register ShiftReg =
11145 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11146 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11147 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11148 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11149 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11150 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11151 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11152 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11153 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11154 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11155 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11156 Register Ptr1Reg;
11157 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11158 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11159 // thisMBB:
11160 // ...
11161 // fallthrough --> loopMBB
11162 BB->addSuccessor(loop1MBB);
11163
11164 // The 4-byte load must be aligned, while a char or short may be
11165 // anywhere in the word. Hence all this nasty bookkeeping code.
11166 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11167 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11168 // xori shift, shift1, 24 [16]
11169 // rlwinm ptr, ptr1, 0, 0, 29
11170 // slw newval2, newval, shift
11171 // slw oldval2, oldval,shift
11172 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11173 // slw mask, mask2, shift
11174 // and newval3, newval2, mask
11175 // and oldval3, oldval2, mask
11176 // loop1MBB:
11177 // lwarx tmpDest, ptr
11178 // and tmp, tmpDest, mask
11179 // cmpw tmp, oldval3
11180 // bne- midMBB
11181 // loop2MBB:
11182 // andc tmp2, tmpDest, mask
11183 // or tmp4, tmp2, newval3
11184 // stwcx. tmp4, ptr
11185 // bne- loop1MBB
11186 // b exitBB
11187 // midMBB:
11188 // stwcx. tmpDest, ptr
11189 // exitBB:
11190 // srw dest, tmpDest, shift
11191 if (ptrA != ZeroReg) {
11192 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11193 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11194 .addReg(ptrA)
11195 .addReg(ptrB);
11196 } else {
11197 Ptr1Reg = ptrB;
11198 }
11199
11200 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11201 // mode.
11202 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11203 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11204 .addImm(3)
11205 .addImm(27)
11206 .addImm(is8bit ? 28 : 27);
11207 if (!isLittleEndian)
11208 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11209 .addReg(Shift1Reg)
11210 .addImm(is8bit ? 24 : 16);
11211 if (is64bit)
11212 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11213 .addReg(Ptr1Reg)
11214 .addImm(0)
11215 .addImm(61);
11216 else
11217 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11218 .addReg(Ptr1Reg)
11219 .addImm(0)
11220 .addImm(0)
11221 .addImm(29);
11222 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11223 .addReg(newval)
11224 .addReg(ShiftReg);
11225 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11226 .addReg(oldval)
11227 .addReg(ShiftReg);
11228 if (is8bit)
11229 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11230 else {
11231 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11232 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11233 .addReg(Mask3Reg)
11234 .addImm(65535);
11235 }
11236 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11237 .addReg(Mask2Reg)
11238 .addReg(ShiftReg);
11239 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11240 .addReg(NewVal2Reg)
11241 .addReg(MaskReg);
11242 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11243 .addReg(OldVal2Reg)
11244 .addReg(MaskReg);
11245
11246 BB = loop1MBB;
11247 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11248 .addReg(ZeroReg)
11249 .addReg(PtrReg);
11250 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11251 .addReg(TmpDestReg)
11252 .addReg(MaskReg);
11253 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11254 .addReg(TmpReg)
11255 .addReg(OldVal3Reg);
11256 BuildMI(BB, dl, TII->get(PPC::BCC))
11257 .addImm(PPC::PRED_NE)
11258 .addReg(PPC::CR0)
11259 .addMBB(midMBB);
11260 BB->addSuccessor(loop2MBB);
11261 BB->addSuccessor(midMBB);
11262
11263 BB = loop2MBB;
11264 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11265 .addReg(TmpDestReg)
11266 .addReg(MaskReg);
11267 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11268 .addReg(Tmp2Reg)
11269 .addReg(NewVal3Reg);
11270 BuildMI(BB, dl, TII->get(PPC::STWCX))
11271 .addReg(Tmp4Reg)
11272 .addReg(ZeroReg)
11273 .addReg(PtrReg);
11274 BuildMI(BB, dl, TII->get(PPC::BCC))
11275 .addImm(PPC::PRED_NE)
11276 .addReg(PPC::CR0)
11277 .addMBB(loop1MBB);
11278 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11279 BB->addSuccessor(loop1MBB);
11280 BB->addSuccessor(exitMBB);
11281
11282 BB = midMBB;
11283 BuildMI(BB, dl, TII->get(PPC::STWCX))
11284 .addReg(TmpDestReg)
11285 .addReg(ZeroReg)
11286 .addReg(PtrReg);
11287 BB->addSuccessor(exitMBB);
11288
11289 // exitMBB:
11290 // ...
11291 BB = exitMBB;
11292 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11293 .addReg(TmpReg)
11294 .addReg(ShiftReg);
11295 } else if (MI.getOpcode() == PPC::FADDrtz) {
11296 // This pseudo performs an FADD with rounding mode temporarily forced
11297 // to round-to-zero. We emit this via custom inserter since the FPSCR
11298 // is not modeled at the SelectionDAG level.
11299 Register Dest = MI.getOperand(0).getReg();
11300 Register Src1 = MI.getOperand(1).getReg();
11301 Register Src2 = MI.getOperand(2).getReg();
11302 DebugLoc dl = MI.getDebugLoc();
11303
11304 MachineRegisterInfo &RegInfo = F->getRegInfo();
11305 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11306
11307 // Save FPSCR value.
11308 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11309
11310 // Set rounding mode to round-to-zero.
11311 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
11312 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
11313
11314 // Perform addition.
11315 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
11316
11317 // Restore FPSCR value.
11318 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11319 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11320 MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
11321 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11322 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
11323 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11324 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
11325 ? PPC::ANDIo8
11326 : PPC::ANDIo;
11327 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11328 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
11329
11330 MachineRegisterInfo &RegInfo = F->getRegInfo();
11331 Register Dest = RegInfo.createVirtualRegister(
11332 Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11333
11334 DebugLoc dl = MI.getDebugLoc();
11335 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
11336 .addReg(MI.getOperand(1).getReg())
11337 .addImm(1);
11338 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
11339 MI.getOperand(0).getReg())
11340 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
11341 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
11342 DebugLoc Dl = MI.getDebugLoc();
11343 MachineRegisterInfo &RegInfo = F->getRegInfo();
11344 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11345 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11346 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11347 MI.getOperand(0).getReg())
11348 .addReg(CRReg);
11349 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11350 DebugLoc Dl = MI.getDebugLoc();
11351 unsigned Imm = MI.getOperand(1).getImm();
11352 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11353 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11354 MI.getOperand(0).getReg())
11355 .addReg(PPC::CR0EQ);
11356 } else if (MI.getOpcode() == PPC::SETRNDi) {
11357 DebugLoc dl = MI.getDebugLoc();
11358 Register OldFPSCRReg = MI.getOperand(0).getReg();
11359
11360 // Save FPSCR value.
11361 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11362
11363 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11364 // the following settings:
11365 // 00 Round to nearest
11366 // 01 Round to 0
11367 // 10 Round to +inf
11368 // 11 Round to -inf
11369
11370 // When the operand is immediate, using the two least significant bits of
11371 // the immediate to set the bits 62:63 of FPSCR.
11372 unsigned Mode = MI.getOperand(1).getImm();
11373 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
11374 .addImm(31);
11375
11376 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
11377 .addImm(30);
11378 } else if (MI.getOpcode() == PPC::SETRND) {
11379 DebugLoc dl = MI.getDebugLoc();
11380
11381 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11382 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11383 // If the target doesn't have DirectMove, we should use stack to do the
11384 // conversion, because the target doesn't have the instructions like mtvsrd
11385 // or mfvsrd to do this conversion directly.
11386 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
11387 if (Subtarget.hasDirectMove()) {
11388 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
11389 .addReg(SrcReg);
11390 } else {
11391 // Use stack to do the register copy.
11392 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
11393 MachineRegisterInfo &RegInfo = F->getRegInfo();
11394 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
11395 if (RC == &PPC::F8RCRegClass) {
11396 // Copy register from F8RCRegClass to G8RCRegclass.
11397 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11398, __PRETTY_FUNCTION__))
11398 "Unsupported RegClass.")(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11398, __PRETTY_FUNCTION__))
;
11399
11400 StoreOp = PPC::STFD;
11401 LoadOp = PPC::LD;
11402 } else {
11403 // Copy register from G8RCRegClass to F8RCRegclass.
11404 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11406, __PRETTY_FUNCTION__))
11405 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11406, __PRETTY_FUNCTION__))
11406 "Unsupported RegClass.")(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11406, __PRETTY_FUNCTION__))
;
11407 }
11408
11409 MachineFrameInfo &MFI = F->getFrameInfo();
11410 int FrameIdx = MFI.CreateStackObject(8, 8, false);
11411
11412 MachineMemOperand *MMOStore = F->getMachineMemOperand(
11413 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11414 MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
11415 MFI.getObjectAlignment(FrameIdx));
11416
11417 // Store the SrcReg into the stack.
11418 BuildMI(*BB, MI, dl, TII->get(StoreOp))
11419 .addReg(SrcReg)
11420 .addImm(0)
11421 .addFrameIndex(FrameIdx)
11422 .addMemOperand(MMOStore);
11423
11424 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
11425 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11426 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
11427 MFI.getObjectAlignment(FrameIdx));
11428
11429 // Load from the stack where SrcReg is stored, and save to DestReg,
11430 // so we have done the RegClass conversion from RegClass::SrcReg to
11431 // RegClass::DestReg.
11432 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
11433 .addImm(0)
11434 .addFrameIndex(FrameIdx)
11435 .addMemOperand(MMOLoad);
11436 }
11437 };
11438
11439 Register OldFPSCRReg = MI.getOperand(0).getReg();
11440
11441 // Save FPSCR value.
11442 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11443
11444 // When the operand is gprc register, use two least significant bits of the
11445 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11446 //
11447 // copy OldFPSCRTmpReg, OldFPSCRReg
11448 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11449 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11450 // copy NewFPSCRReg, NewFPSCRTmpReg
11451 // mtfsf 255, NewFPSCRReg
11452 MachineOperand SrcOp = MI.getOperand(1);
11453 MachineRegisterInfo &RegInfo = F->getRegInfo();
11454 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11455
11456 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
11457
11458 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11459 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11460
11461 // The first operand of INSERT_SUBREG should be a register which has
11462 // subregisters, we only care about its RegClass, so we should use an
11463 // IMPLICIT_DEF register.
11464 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
11465 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
11466 .addReg(ImDefReg)
11467 .add(SrcOp)
11468 .addImm(1);
11469
11470 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11471 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
11472 .addReg(OldFPSCRTmpReg)
11473 .addReg(ExtSrcReg)
11474 .addImm(0)
11475 .addImm(62);
11476
11477 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11478 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
11479
11480 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11481 // bits of FPSCR.
11482 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
11483 .addImm(255)
11484 .addReg(NewFPSCRReg)
11485 .addImm(0)
11486 .addImm(0);
11487 } else {
11488 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11488)
;
11489 }
11490
11491 MI.eraseFromParent(); // The pseudo instruction is gone now.
11492 return BB;
11493}
11494
11495//===----------------------------------------------------------------------===//
11496// Target Optimization Hooks
11497//===----------------------------------------------------------------------===//
11498
11499static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
11500 // For the estimates, convergence is quadratic, so we essentially double the
11501 // number of digits correct after every iteration. For both FRE and FRSQRTE,
11502 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11503 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11504 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
11505 if (VT.getScalarType() == MVT::f64)
11506 RefinementSteps++;
11507 return RefinementSteps;
11508}
11509
11510SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
11511 int Enabled, int &RefinementSteps,
11512 bool &UseOneConstNR,
11513 bool Reciprocal) const {
11514 EVT VT = Operand.getValueType();
11515 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
11516 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
11517 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11518 (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11519 (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11520 (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11521 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11522 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11523
11524 // The Newton-Raphson computation with a single constant does not provide
11525 // enough accuracy on some CPUs.
11526 UseOneConstNR = !Subtarget.needsTwoConstNR();
11527 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
11528 }
11529 return SDValue();
11530}
11531
11532SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
11533 int Enabled,
11534 int &RefinementSteps) const {
11535 EVT VT = Operand.getValueType();
11536 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
11537 (VT == MVT::f64 && Subtarget.hasFRE()) ||
11538 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11539 (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11540 (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11541 (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11542 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11543 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11544 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
11545 }
11546 return SDValue();
11547}
11548
11549unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11550 // Note: This functionality is used only when unsafe-fp-math is enabled, and
11551 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11552 // enabled for division), this functionality is redundant with the default
11553 // combiner logic (once the division -> reciprocal/multiply transformation
11554 // has taken place). As a result, this matters more for older cores than for
11555 // newer ones.
11556
11557 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11558 // reciprocal if there are two or more FDIVs (for embedded cores with only
11559 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11560 switch (Subtarget.getDarwinDirective()) {
11561 default:
11562 return 3;
11563 case PPC::DIR_440:
11564 case PPC::DIR_A2:
11565 case PPC::DIR_E500:
11566 case PPC::DIR_E500mc:
11567 case PPC::DIR_E5500:
11568 return 2;
11569 }
11570}
11571
11572// isConsecutiveLSLoc needs to work even if all adds have not yet been
11573// collapsed, and so we need to look through chains of them.
11574static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
11575 int64_t& Offset, SelectionDAG &DAG) {
11576 if (DAG.isBaseWithConstantOffset(Loc)) {
11577 Base = Loc.getOperand(0);
11578 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
11579
11580 // The base might itself be a base plus an offset, and if so, accumulate
11581 // that as well.
11582 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
11583 }
11584}
11585
11586static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
11587 unsigned Bytes, int Dist,
11588 SelectionDAG &DAG) {
11589 if (VT.getSizeInBits() / 8 != Bytes)
11590 return false;
11591
11592 SDValue BaseLoc = Base->getBasePtr();
11593 if (Loc.getOpcode() == ISD::FrameIndex) {
11594 if (BaseLoc.getOpcode() != ISD::FrameIndex)
11595 return false;
11596 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11597 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
11598 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
11599 int FS = MFI.getObjectSize(FI);
11600 int BFS = MFI.getObjectSize(BFI);
11601 if (FS != BFS || FS != (int)Bytes) return false;
11602 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
11603 }
11604
11605 SDValue Base1 = Loc, Base2 = BaseLoc;
11606 int64_t Offset1 = 0, Offset2 = 0;
11607 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
11608 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
11609 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
11610 return true;
11611
11612 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11613 const GlobalValue *GV1 = nullptr;
11614 const GlobalValue *GV2 = nullptr;
11615 Offset1 = 0;
11616 Offset2 = 0;
11617 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
11618 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
11619 if (isGA1 && isGA2 && GV1 == GV2)
11620 return Offset1 == (Offset2 + Dist*Bytes);
11621 return false;
11622}
11623
11624// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11625// not enforce equality of the chain operands.
11626static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
11627 unsigned Bytes, int Dist,
11628 SelectionDAG &DAG) {
11629 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
11630 EVT VT = LS->getMemoryVT();
11631 SDValue Loc = LS->getBasePtr();
11632 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
11633 }
11634
11635 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
11636 EVT VT;
11637 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11638 default: return false;
11639 case Intrinsic::ppc_qpx_qvlfd:
11640 case Intrinsic::ppc_qpx_qvlfda:
11641 VT = MVT::v4f64;
11642 break;
11643 case Intrinsic::ppc_qpx_qvlfs:
11644 case Intrinsic::ppc_qpx_qvlfsa:
11645 VT = MVT::v4f32;
11646 break;
11647 case Intrinsic::ppc_qpx_qvlfcd:
11648 case Intrinsic::ppc_qpx_qvlfcda:
11649 VT = MVT::v2f64;
11650 break;
11651 case Intrinsic::ppc_qpx_qvlfcs:
11652 case Intrinsic::ppc_qpx_qvlfcsa:
11653 VT = MVT::v2f32;
11654 break;
11655 case Intrinsic::ppc_qpx_qvlfiwa:
11656 case Intrinsic::ppc_qpx_qvlfiwz:
11657 case Intrinsic::ppc_altivec_lvx:
11658 case Intrinsic::ppc_altivec_lvxl:
11659 case Intrinsic::ppc_vsx_lxvw4x:
11660 case Intrinsic::ppc_vsx_lxvw4x_be:
11661 VT = MVT::v4i32;
11662 break;
11663 case Intrinsic::ppc_vsx_lxvd2x:
11664 case Intrinsic::ppc_vsx_lxvd2x_be:
11665 VT = MVT::v2f64;
11666 break;
11667 case Intrinsic::ppc_altivec_lvebx:
11668 VT = MVT::i8;
11669 break;
11670 case Intrinsic::ppc_altivec_lvehx:
11671 VT = MVT::i16;
11672 break;
11673 case Intrinsic::ppc_altivec_lvewx:
11674 VT = MVT::i32;
11675 break;
11676 }
11677
11678 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
11679 }
11680
11681 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
11682 EVT VT;
11683 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11684 default: return false;
11685 case Intrinsic::ppc_qpx_qvstfd:
11686 case Intrinsic::ppc_qpx_qvstfda:
11687 VT = MVT::v4f64;
11688 break;
11689 case Intrinsic::ppc_qpx_qvstfs:
11690 case Intrinsic::ppc_qpx_qvstfsa:
11691 VT = MVT::v4f32;
11692 break;
11693 case Intrinsic::ppc_qpx_qvstfcd:
11694 case Intrinsic::ppc_qpx_qvstfcda:
11695 VT = MVT::v2f64;
11696 break;
11697 case Intrinsic::ppc_qpx_qvstfcs:
11698 case Intrinsic::ppc_qpx_qvstfcsa:
11699 VT = MVT::v2f32;
11700 break;
11701 case Intrinsic::ppc_qpx_qvstfiw:
11702 case Intrinsic::ppc_qpx_qvstfiwa:
11703 case Intrinsic::ppc_altivec_stvx:
11704 case Intrinsic::ppc_altivec_stvxl:
11705 case Intrinsic::ppc_vsx_stxvw4x:
11706 VT = MVT::v4i32;
11707 break;
11708 case Intrinsic::ppc_vsx_stxvd2x:
11709 VT = MVT::v2f64;
11710 break;
11711 case Intrinsic::ppc_vsx_stxvw4x_be:
11712 VT = MVT::v4i32;
11713 break;
11714 case Intrinsic::ppc_vsx_stxvd2x_be:
11715 VT = MVT::v2f64;
11716 break;
11717 case Intrinsic::ppc_altivec_stvebx:
11718 VT = MVT::i8;
11719 break;
11720 case Intrinsic::ppc_altivec_stvehx:
11721 VT = MVT::i16;
11722 break;
11723 case Intrinsic::ppc_altivec_stvewx:
11724 VT = MVT::i32;
11725 break;
11726 }
11727
11728 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
11729 }
11730
11731 return false;
11732}
11733
11734// Return true is there is a nearyby consecutive load to the one provided
11735// (regardless of alignment). We search up and down the chain, looking though
11736// token factors and other loads (but nothing else). As a result, a true result
11737// indicates that it is safe to create a new consecutive load adjacent to the
11738// load provided.
11739static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
11740 SDValue Chain = LD->getChain();
11741 EVT VT = LD->getMemoryVT();
11742
11743 SmallSet<SDNode *, 16> LoadRoots;
11744 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
11745 SmallSet<SDNode *, 16> Visited;
11746
11747 // First, search up the chain, branching to follow all token-factor operands.
11748 // If we find a consecutive load, then we're done, otherwise, record all
11749 // nodes just above the top-level loads and token factors.
11750 while (!Queue.empty()) {
11751 SDNode *ChainNext = Queue.pop_back_val();
11752 if (!Visited.insert(ChainNext).second)
11753 continue;
11754
11755 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
11756 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11757 return true;
11758
11759 if (!Visited.count(ChainLD->getChain().getNode()))
11760 Queue.push_back(ChainLD->getChain().getNode());
11761 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
11762 for (const SDUse &O : ChainNext->ops())
11763 if (!Visited.count(O.getNode()))
11764 Queue.push_back(O.getNode());
11765 } else
11766 LoadRoots.insert(ChainNext);
11767 }
11768
11769 // Second, search down the chain, starting from the top-level nodes recorded
11770 // in the first phase. These top-level nodes are the nodes just above all
11771 // loads and token factors. Starting with their uses, recursively look though
11772 // all loads (just the chain uses) and token factors to find a consecutive
11773 // load.
11774 Visited.clear();
11775 Queue.clear();
11776
11777 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
11778 IE = LoadRoots.end(); I != IE; ++I) {
11779 Queue.push_back(*I);
11780
11781 while (!Queue.empty()) {
11782 SDNode *LoadRoot = Queue.pop_back_val();
11783 if (!Visited.insert(LoadRoot).second)
11784 continue;
11785
11786 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
11787 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11788 return true;
11789
11790 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
11791 UE = LoadRoot->use_end(); UI != UE; ++UI)
11792 if (((isa<MemSDNode>(*UI) &&
11793 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
11794 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
11795 Queue.push_back(*UI);
11796 }
11797 }
11798
11799 return false;
11800}
11801
11802/// This function is called when we have proved that a SETCC node can be replaced
11803/// by subtraction (and other supporting instructions) so that the result of
11804/// comparison is kept in a GPR instead of CR. This function is purely for
11805/// codegen purposes and has some flags to guide the codegen process.
11806static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
11807 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
11808 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11808, __PRETTY_FUNCTION__))
;
11809
11810 // Zero extend the operands to the largest legal integer. Originally, they
11811 // must be of a strictly smaller size.
11812 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
11813 DAG.getConstant(Size, DL, MVT::i32));
11814 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
11815 DAG.getConstant(Size, DL, MVT::i32));
11816
11817 // Swap if needed. Depends on the condition code.
11818 if (Swap)
11819 std::swap(Op0, Op1);
11820
11821 // Subtract extended integers.
11822 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
11823
11824 // Move the sign bit to the least significant position and zero out the rest.
11825 // Now the least significant bit carries the result of original comparison.
11826 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
11827 DAG.getConstant(Size - 1, DL, MVT::i32));
11828 auto Final = Shifted;
11829
11830 // Complement the result if needed. Based on the condition code.
11831 if (Complement)
11832 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
11833 DAG.getConstant(1, DL, MVT::i64));
11834
11835 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
11836}
11837
11838SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
11839 DAGCombinerInfo &DCI) const {
11840 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11840, __PRETTY_FUNCTION__))
;
11841
11842 SelectionDAG &DAG = DCI.DAG;
11843 SDLoc DL(N);
11844
11845 // Size of integers being compared has a critical role in the following
11846 // analysis, so we prefer to do this when all types are legal.
11847 if (!DCI.isAfterLegalizeDAG())
11848 return SDValue();
11849
11850 // If all users of SETCC extend its value to a legal integer type
11851 // then we replace SETCC with a subtraction
11852 for (SDNode::use_iterator UI = N->use_begin(),
11853 UE = N->use_end(); UI != UE; ++UI) {
11854 if (UI->getOpcode() != ISD::ZERO_EXTEND)
11855 return SDValue();
11856 }
11857
11858 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11859 auto OpSize = N->getOperand(0).getValueSizeInBits();
11860
11861 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
11862
11863 if (OpSize < Size) {
11864 switch (CC) {
11865 default: break;
11866 case ISD::SETULT:
11867 return generateEquivalentSub(N, Size, false, false, DL, DAG);
11868 case ISD::SETULE:
11869 return generateEquivalentSub(N, Size, true, true, DL, DAG);
11870 case ISD::SETUGT:
11871 return generateEquivalentSub(N, Size, false, true, DL, DAG);
11872 case ISD::SETUGE:
11873 return generateEquivalentSub(N, Size, true, false, DL, DAG);
11874 }
11875 }
11876
11877 return SDValue();
11878}
11879
11880SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
11881 DAGCombinerInfo &DCI) const {
11882 SelectionDAG &DAG = DCI.DAG;
11883 SDLoc dl(N);
11884
11885 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")((Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11885, __PRETTY_FUNCTION__))
;
11886 // If we're tracking CR bits, we need to be careful that we don't have:
11887 // trunc(binary-ops(zext(x), zext(y)))
11888 // or
11889 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11890 // such that we're unnecessarily moving things into GPRs when it would be
11891 // better to keep them in CR bits.
11892
11893 // Note that trunc here can be an actual i1 trunc, or can be the effective
11894 // truncation that comes from a setcc or select_cc.
11895 if (N->getOpcode() == ISD::TRUNCATE &&
11896 N->getValueType(0) != MVT::i1)
11897 return SDValue();
11898
11899 if (N->getOperand(0).getValueType() != MVT::i32 &&
11900 N->getOperand(0).getValueType() != MVT::i64)
11901 return SDValue();
11902
11903 if (N->getOpcode() == ISD::SETCC ||
11904 N->getOpcode() == ISD::SELECT_CC) {
11905 // If we're looking at a comparison, then we need to make sure that the
11906 // high bits (all except for the first) don't matter the result.
11907 ISD::CondCode CC =
11908 cast<CondCodeSDNode>(N->getOperand(
11909 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
11910 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
11911
11912 if (ISD::isSignedIntSetCC(CC)) {
11913 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
11914 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
11915 return SDValue();
11916 } else if (ISD::isUnsignedIntSetCC(CC)) {
11917 if (!DAG.MaskedValueIsZero(N->getOperand(0),
11918 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
11919 !DAG.MaskedValueIsZero(N->getOperand(1),
11920 APInt::getHighBitsSet(OpBits, OpBits-1)))
11921 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
11922 : SDValue());
11923 } else {
11924 // This is neither a signed nor an unsigned comparison, just make sure
11925 // that the high bits are equal.
11926 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
11927 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
11928
11929 // We don't really care about what is known about the first bit (if
11930 // anything), so clear it in all masks prior to comparing them.
11931 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
11932 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
11933
11934 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
11935 return SDValue();
11936 }
11937 }
11938
11939 // We now know that the higher-order bits are irrelevant, we just need to
11940 // make sure that all of the intermediate operations are bit operations, and
11941 // all inputs are extensions.
11942 if (N->getOperand(0).getOpcode() != ISD::AND &&
11943 N->getOperand(0).getOpcode() != ISD::OR &&
11944 N->getOperand(0).getOpcode() != ISD::XOR &&
11945 N->getOperand(0).getOpcode() != ISD::SELECT &&
11946 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
11947 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
11948 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
11949 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
11950 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
11951 return SDValue();
11952
11953 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
11954 N->getOperand(1).getOpcode() != ISD::AND &&
11955 N->getOperand(1).getOpcode() != ISD::OR &&
11956 N->getOperand(1).getOpcode() != ISD::XOR &&
11957 N->getOperand(1).getOpcode() != ISD::SELECT &&
11958 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
11959 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
11960 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
11961 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
11962 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
11963 return SDValue();
11964
11965 SmallVector<SDValue, 4> Inputs;
11966 SmallVector<SDValue, 8> BinOps, PromOps;
11967 SmallPtrSet<SDNode *, 16> Visited;
11968
11969 for (unsigned i = 0; i < 2; ++i) {
11970 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11971 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11972 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11973 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11974 isa<ConstantSDNode>(N->getOperand(i)))
11975 Inputs.push_back(N->getOperand(i));
11976 else
11977 BinOps.push_back(N->getOperand(i));
11978
11979 if (N->getOpcode() == ISD::TRUNCATE)
11980 break;
11981 }
11982
11983 // Visit all inputs, collect all binary operations (and, or, xor and
11984 // select) that are all fed by extensions.
11985 while (!BinOps.empty()) {
11986 SDValue BinOp = BinOps.back();
11987 BinOps.pop_back();
11988
11989 if (!Visited.insert(BinOp.getNode()).second)
11990 continue;
11991
11992 PromOps.push_back(BinOp);
11993
11994 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
11995 // The condition of the select is not promoted.
11996 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
11997 continue;
11998 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
11999 continue;
12000
12001 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12002 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12003 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12004 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12005 isa<ConstantSDNode>(BinOp.getOperand(i))) {
12006 Inputs.push_back(BinOp.getOperand(i));
12007 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12008 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12009 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12010 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12011 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12012 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12013 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12014 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12015 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12016 BinOps.push_back(BinOp.getOperand(i));
12017 } else {
12018 // We have an input that is not an extension or another binary
12019 // operation; we'll abort this transformation.
12020 return SDValue();
12021 }
12022 }
12023 }
12024
12025 // Make sure that this is a self-contained cluster of operations (which
12026 // is not quite the same thing as saying that everything has only one
12027 // use).
12028 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12029 if (isa<ConstantSDNode>(Inputs[i]))
12030 continue;
12031
12032 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12033 UE = Inputs[i].getNode()->use_end();
12034 UI != UE; ++UI) {
12035 SDNode *User = *UI;
12036 if (User != N && !Visited.count(User))
12037 return SDValue();
12038
12039 // Make sure that we're not going to promote the non-output-value
12040 // operand(s) or SELECT or SELECT_CC.
12041 // FIXME: Although we could sometimes handle this, and it does occur in
12042 // practice that one of the condition inputs to the select is also one of
12043 // the outputs, we currently can't deal with this.
12044 if (User->getOpcode() == ISD::SELECT) {
12045 if (User->getOperand(0) == Inputs[i])
12046 return SDValue();
12047 } else if (User->getOpcode() == ISD::SELECT_CC) {
12048 if (User->getOperand(0) == Inputs[i] ||
12049 User->getOperand(1) == Inputs[i])
12050 return SDValue();
12051 }
12052 }
12053 }
12054
12055 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12056 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12057 UE = PromOps[i].getNode()->use_end();
12058 UI != UE; ++UI) {
12059 SDNode *User = *UI;
12060 if (User != N && !Visited.count(User))
12061 return SDValue();
12062
12063 // Make sure that we're not going to promote the non-output-value
12064 // operand(s) or SELECT or SELECT_CC.
12065 // FIXME: Although we could sometimes handle this, and it does occur in
12066 // practice that one of the condition inputs to the select is also one of
12067 // the outputs, we currently can't deal with this.
12068 if (User->getOpcode() == ISD::SELECT) {
12069 if (User->getOperand(0) == PromOps[i])
12070 return SDValue();
12071 } else if (User->getOpcode() == ISD::SELECT_CC) {
12072 if (User->getOperand(0) == PromOps[i] ||
12073 User->getOperand(1) == PromOps[i])
12074 return SDValue();
12075 }
12076 }
12077 }
12078
12079 // Replace all inputs with the extension operand.
12080 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12081 // Constants may have users outside the cluster of to-be-promoted nodes,
12082 // and so we need to replace those as we do the promotions.
12083 if (isa<ConstantSDNode>(Inputs[i]))
12084 continue;
12085 else
12086 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12087 }
12088
12089 std::list<HandleSDNode> PromOpHandles;
12090 for (auto &PromOp : PromOps)
12091 PromOpHandles.emplace_back(PromOp);
12092
12093 // Replace all operations (these are all the same, but have a different
12094 // (i1) return type). DAG.getNode will validate that the types of
12095 // a binary operator match, so go through the list in reverse so that
12096 // we've likely promoted both operands first. Any intermediate truncations or
12097 // extensions disappear.
12098 while (!PromOpHandles.empty()) {
12099 SDValue PromOp = PromOpHandles.back().getValue();
12100 PromOpHandles.pop_back();
12101
12102 if (PromOp.getOpcode() == ISD::TRUNCATE ||
12103 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12104 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12105 PromOp.getOpcode() == ISD::ANY_EXTEND) {
12106 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12107 PromOp.getOperand(0).getValueType() != MVT::i1) {
12108 // The operand is not yet ready (see comment below).
12109 PromOpHandles.emplace_front(PromOp);
12110 continue;
12111 }
12112
12113 SDValue RepValue = PromOp.getOperand(0);
12114 if (isa<ConstantSDNode>(RepValue))
12115 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12116
12117 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12118 continue;
12119 }
12120
12121 unsigned C;
12122 switch (PromOp.getOpcode()) {
12123 default: C = 0; break;
12124 case ISD::SELECT: C = 1; break;
12125 case ISD::SELECT_CC: C = 2; break;
12126 }
12127
12128 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12129 PromOp.getOperand(C).getValueType() != MVT::i1) ||
12130 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12131 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12132 // The to-be-promoted operands of this node have not yet been
12133 // promoted (this should be rare because we're going through the
12134 // list backward, but if one of the operands has several users in
12135 // this cluster of to-be-promoted nodes, it is possible).
12136 PromOpHandles.emplace_front(PromOp);
12137 continue;
12138 }
12139
12140 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12141 PromOp.getNode()->op_end());
12142
12143 // If there are any constant inputs, make sure they're replaced now.
12144 for (unsigned i = 0; i < 2; ++i)
12145 if (isa<ConstantSDNode>(Ops[C+i]))
12146 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12147
12148 DAG.ReplaceAllUsesOfValueWith(PromOp,
12149 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12150 }
12151
12152 // Now we're left with the initial truncation itself.
12153 if (N->getOpcode() == ISD::TRUNCATE)
12154 return N->getOperand(0);
12155
12156 // Otherwise, this is a comparison. The operands to be compared have just
12157 // changed type (to i1), but everything else is the same.
12158 return SDValue(N, 0);
12159}
12160
12161SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12162 DAGCombinerInfo &DCI) const {
12163 SelectionDAG &DAG = DCI.DAG;
12164 SDLoc dl(N);
12165
12166 // If we're tracking CR bits, we need to be careful that we don't have:
12167 // zext(binary-ops(trunc(x), trunc(y)))
12168 // or
12169 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12170 // such that we're unnecessarily moving things into CR bits that can more
12171 // efficiently stay in GPRs. Note that if we're not certain that the high
12172 // bits are set as required by the final extension, we still may need to do
12173 // some masking to get the proper behavior.
12174
12175 // This same functionality is important on PPC64 when dealing with
12176 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12177 // the return values of functions. Because it is so similar, it is handled
12178 // here as well.
12179
12180 if (N->getValueType(0) != MVT::i32 &&
12181 N->getValueType(0) != MVT::i64)
12182 return SDValue();
12183
12184 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12185 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12186 return SDValue();
12187
12188 if (N->getOperand(0).getOpcode() != ISD::AND &&
12189 N->getOperand(0).getOpcode() != ISD::OR &&
12190 N->getOperand(0).getOpcode() != ISD::XOR &&
12191 N->getOperand(0).getOpcode() != ISD::SELECT &&
12192 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12193 return SDValue();
12194
12195 SmallVector<SDValue, 4> Inputs;
12196 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12197 SmallPtrSet<SDNode *, 16> Visited;
12198
12199 // Visit all inputs, collect all binary operations (and, or, xor and
12200 // select) that are all fed by truncations.
12201 while (!BinOps.empty()) {
12202 SDValue BinOp = BinOps.back();
12203 BinOps.pop_back();
12204
12205 if (!Visited.insert(BinOp.getNode()).second)
12206 continue;
12207
12208 PromOps.push_back(BinOp);
12209
12210 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12211 // The condition of the select is not promoted.
12212 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12213 continue;
12214 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12215 continue;
12216
12217 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12218 isa<ConstantSDNode>(BinOp.getOperand(i))) {
12219 Inputs.push_back(BinOp.getOperand(i));
12220 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12221 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12222 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12223 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12224 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12225 BinOps.push_back(BinOp.getOperand(i));
12226 } else {
12227 // We have an input that is not a truncation or another binary
12228 // operation; we'll abort this transformation.
12229 return SDValue();
12230 }
12231 }
12232 }
12233
12234 // The operands of a select that must be truncated when the select is
12235 // promoted because the operand is actually part of the to-be-promoted set.
12236 DenseMap<SDNode *, EVT> SelectTruncOp[2];
12237
12238 // Make sure that this is a self-contained cluster of operations (which
12239 // is not quite the same thing as saying that everything has only one
12240 // use).
12241 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12242 if (isa<ConstantSDNode>(Inputs[i]))
12243 continue;
12244
12245 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12246 UE = Inputs[i].getNode()->use_end();
12247 UI != UE; ++UI) {
12248 SDNode *User = *UI;
12249 if (User != N && !Visited.count(User))
12250 return SDValue();
12251
12252 // If we're going to promote the non-output-value operand(s) or SELECT or
12253 // SELECT_CC, record them for truncation.
12254 if (User->getOpcode() == ISD::SELECT) {
12255 if (User->getOperand(0) == Inputs[i])
12256 SelectTruncOp[0].insert(std::make_pair(User,
12257 User->getOperand(0).getValueType()));
12258 } else if (User->getOpcode() == ISD::SELECT_CC) {
12259 if (User->getOperand(0) == Inputs[i])
12260 SelectTruncOp[0].insert(std::make_pair(User,
12261 User->getOperand(0).getValueType()));
12262 if (User->getOperand(1) == Inputs[i])
12263 SelectTruncOp[1].insert(std::make_pair(User,
12264 User->getOperand(1).getValueType()));
12265 }
12266 }
12267 }
12268
12269 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12270 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12271 UE = PromOps[i].getNode()->use_end();
12272 UI != UE; ++UI) {
12273 SDNode *User = *UI;
12274 if (User != N && !Visited.count(User))
12275 return SDValue();
12276
12277 // If we're going to promote the non-output-value operand(s) or SELECT or
12278 // SELECT_CC, record them for truncation.
12279 if (User->getOpcode() == ISD::SELECT) {
12280 if (User->getOperand(0) == PromOps[i])
12281 SelectTruncOp[0].insert(std::make_pair(User,
12282 User->getOperand(0).getValueType()));
12283 } else if (User->getOpcode() == ISD::SELECT_CC) {
12284 if (User->getOperand(0) == PromOps[i])
12285 SelectTruncOp[0].insert(std::make_pair(User,
12286 User->getOperand(0).getValueType()));
12287 if (User->getOperand(1) == PromOps[i])
12288 SelectTruncOp[1].insert(std::make_pair(User,
12289 User->getOperand(1).getValueType()));
12290 }
12291 }
12292 }
12293
12294 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12295 bool ReallyNeedsExt = false;
12296 if (N->getOpcode() != ISD::ANY_EXTEND) {
12297 // If all of the inputs are not already sign/zero extended, then
12298 // we'll still need to do that at the end.
12299 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12300 if (isa<ConstantSDNode>(Inputs[i]))
12301 continue;
12302
12303 unsigned OpBits =
12304 Inputs[i].getOperand(0).getValueSizeInBits();
12305 assert(PromBits < OpBits && "Truncation not to a smaller bit count?")((PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? static_cast<void> (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12305, __PRETTY_FUNCTION__))
;
12306
12307 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12308 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12309 APInt::getHighBitsSet(OpBits,
12310 OpBits-PromBits))) ||
12311 (N->getOpcode() == ISD::SIGN_EXTEND &&
12312 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12313 (OpBits-(PromBits-1)))) {
12314 ReallyNeedsExt = true;
12315 break;
12316 }
12317 }
12318 }
12319
12320 // Replace all inputs, either with the truncation operand, or a
12321 // truncation or extension to the final output type.
12322 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12323 // Constant inputs need to be replaced with the to-be-promoted nodes that
12324 // use them because they might have users outside of the cluster of
12325 // promoted nodes.
12326 if (isa<ConstantSDNode>(Inputs[i]))
12327 continue;
12328
12329 SDValue InSrc = Inputs[i].getOperand(0);
12330 if (Inputs[i].getValueType() == N->getValueType(0))
12331 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12332 else if (N->getOpcode() == ISD::SIGN_EXTEND)
12333 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12334 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12335 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12336 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12337 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12338 else
12339 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12340 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12341 }
12342
12343 std::list<HandleSDNode> PromOpHandles;
12344 for (auto &PromOp : PromOps)
12345 PromOpHandles.emplace_back(PromOp);
12346
12347 // Replace all operations (these are all the same, but have a different
12348 // (promoted) return type). DAG.getNode will validate that the types of
12349 // a binary operator match, so go through the list in reverse so that
12350 // we've likely promoted both operands first.
12351 while (!PromOpHandles.empty()) {
12352 SDValue PromOp = PromOpHandles.back().getValue();
12353 PromOpHandles.pop_back();
12354
12355 unsigned C;
12356 switch (PromOp.getOpcode()) {
12357 default: C = 0; break;
12358 case ISD::SELECT: C = 1; break;
12359 case ISD::SELECT_CC: C = 2; break;
12360 }
12361
12362 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12363 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
12364 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12365 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
12366 // The to-be-promoted operands of this node have not yet been
12367 // promoted (this should be rare because we're going through the
12368 // list backward, but if one of the operands has several users in
12369 // this cluster of to-be-promoted nodes, it is possible).
12370 PromOpHandles.emplace_front(PromOp);
12371 continue;
12372 }
12373
12374 // For SELECT and SELECT_CC nodes, we do a similar check for any
12375 // to-be-promoted comparison inputs.
12376 if (PromOp.getOpcode() == ISD::SELECT ||
12377 PromOp.getOpcode() == ISD::SELECT_CC) {
12378 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
12379 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
12380 (SelectTruncOp[1].count(PromOp.getNode()) &&
12381 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
12382 PromOpHandles.emplace_front(PromOp);
12383 continue;
12384 }
12385 }
12386
12387 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12388 PromOp.getNode()->op_end());
12389
12390 // If this node has constant inputs, then they'll need to be promoted here.
12391 for (unsigned i = 0; i < 2; ++i) {
12392 if (!isa<ConstantSDNode>(Ops[C+i]))
12393 continue;
12394 if (Ops[C+i].getValueType() == N->getValueType(0))
12395 continue;
12396
12397 if (N->getOpcode() == ISD::SIGN_EXTEND)
12398 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12399 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12400 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12401 else
12402 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12403 }
12404
12405 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12406 // truncate them again to the original value type.
12407 if (PromOp.getOpcode() == ISD::SELECT ||
12408 PromOp.getOpcode() == ISD::SELECT_CC) {
12409 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
12410 if (SI0 != SelectTruncOp[0].end())
12411 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
12412 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
12413 if (SI1 != SelectTruncOp[1].end())
12414 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
12415 }
12416
12417 DAG.ReplaceAllUsesOfValueWith(PromOp,
12418 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
12419 }
12420
12421 // Now we're left with the initial extension itself.
12422 if (!ReallyNeedsExt)
12423 return N->getOperand(0);
12424
12425 // To zero extend, just mask off everything except for the first bit (in the
12426 // i1 case).
12427 if (N->getOpcode() == ISD::ZERO_EXTEND)
12428 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
12429 DAG.getConstant(APInt::getLowBitsSet(
12430 N->getValueSizeInBits(0), PromBits),
12431 dl, N->getValueType(0)));
12432
12433 assert(N->getOpcode() == ISD::SIGN_EXTEND &&((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12434, __PRETTY_FUNCTION__))
12434 "Invalid extension type")((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12434, __PRETTY_FUNCTION__))
;
12435 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
12436 SDValue ShiftCst =
12437 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
12438 return DAG.getNode(
12439 ISD::SRA, dl, N->getValueType(0),
12440 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
12441 ShiftCst);
12442}
12443
12444SDValue PPCTargetLowering::combineSetCC(SDNode *N,
12445 DAGCombinerInfo &DCI) const {
12446 assert(N->getOpcode() == ISD::SETCC &&((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12447, __PRETTY_FUNCTION__))
12447 "Should be called with a SETCC node")((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12447, __PRETTY_FUNCTION__))
;
12448
12449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12450 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
12451 SDValue LHS = N->getOperand(0);
12452 SDValue RHS = N->getOperand(1);
12453
12454 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12455 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
12456 LHS.hasOneUse())
12457 std::swap(LHS, RHS);
12458
12459 // x == 0-y --> x+y == 0
12460 // x != 0-y --> x+y != 0
12461 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
12462 RHS.hasOneUse()) {
12463 SDLoc DL(N);
12464 SelectionDAG &DAG = DCI.DAG;
12465 EVT VT = N->getValueType(0);
12466 EVT OpVT = LHS.getValueType();
12467 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
12468 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
12469 }
12470 }
12471
12472 return DAGCombineTruncBoolExt(N, DCI);
12473}
12474
12475// Is this an extending load from an f32 to an f64?
12476static bool isFPExtLoad(SDValue Op) {
12477 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
12478 return LD->getExtensionType() == ISD::EXTLOAD &&
12479 Op.getValueType() == MVT::f64;
12480 return false;
12481}
12482
12483/// Reduces the number of fp-to-int conversion when building a vector.
12484///
12485/// If this vector is built out of floating to integer conversions,
12486/// transform it to a vector built out of floating point values followed by a
12487/// single floating to integer conversion of the vector.
12488/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
12489/// becomes (fptosi (build_vector ($A, $B, ...)))
12490SDValue PPCTargetLowering::
12491combineElementTruncationToVectorTruncation(SDNode *N,
12492 DAGCombinerInfo &DCI) const {
12493 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12494, __PRETTY_FUNCTION__))
12494 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12494, __PRETTY_FUNCTION__))
;
12495
12496 SelectionDAG &DAG = DCI.DAG;
12497 SDLoc dl(N);
12498
12499 SDValue FirstInput = N->getOperand(0);
12500 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12501, __PRETTY_FUNCTION__))
12501 "The input operand must be an fp-to-int conversion.")((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12501, __PRETTY_FUNCTION__))
;
12502
12503 // This combine happens after legalization so the fp_to_[su]i nodes are
12504 // already converted to PPCSISD nodes.
12505 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
12506 if (FirstConversion == PPCISD::FCTIDZ ||
12507 FirstConversion == PPCISD::FCTIDUZ ||
12508 FirstConversion == PPCISD::FCTIWZ ||
12509 FirstConversion == PPCISD::FCTIWUZ) {
12510 bool IsSplat = true;
12511 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
12512 FirstConversion == PPCISD::FCTIWUZ;
12513 EVT SrcVT = FirstInput.getOperand(0).getValueType();
12514 SmallVector<SDValue, 4> Ops;
12515 EVT TargetVT = N->getValueType(0);
12516 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12517 SDValue NextOp = N->getOperand(i);
12518 if (NextOp.getOpcode() != PPCISD::MFVSR)
12519 return SDValue();
12520 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
12521 if (NextConversion != FirstConversion)
12522 return SDValue();
12523 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12524 // This is not valid if the input was originally double precision. It is
12525 // also not profitable to do unless this is an extending load in which
12526 // case doing this combine will allow us to combine consecutive loads.
12527 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
12528 return SDValue();
12529 if (N->getOperand(i) != FirstInput)
12530 IsSplat = false;
12531 }
12532
12533 // If this is a splat, we leave it as-is since there will be only a single
12534 // fp-to-int conversion followed by a splat of the integer. This is better
12535 // for 32-bit and smaller ints and neutral for 64-bit ints.
12536 if (IsSplat)
12537 return SDValue();
12538
12539 // Now that we know we have the right type of node, get its operands
12540 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12541 SDValue In = N->getOperand(i).getOperand(0);
12542 if (Is32Bit) {
12543 // For 32-bit values, we need to add an FP_ROUND node (if we made it
12544 // here, we know that all inputs are extending loads so this is safe).
12545 if (In.isUndef())
12546 Ops.push_back(DAG.getUNDEF(SrcVT));
12547 else {
12548 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
12549 MVT::f32, In.getOperand(0),
12550 DAG.getIntPtrConstant(1, dl));
12551 Ops.push_back(Trunc);
12552 }
12553 } else
12554 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
12555 }
12556
12557 unsigned Opcode;
12558 if (FirstConversion == PPCISD::FCTIDZ ||
12559 FirstConversion == PPCISD::FCTIWZ)
12560 Opcode = ISD::FP_TO_SINT;
12561 else
12562 Opcode = ISD::FP_TO_UINT;
12563
12564 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
12565 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
12566 return DAG.getNode(Opcode, dl, TargetVT, BV);
12567 }
12568 return SDValue();
12569}
12570
12571/// Reduce the number of loads when building a vector.
12572///
12573/// Building a vector out of multiple loads can be converted to a load
12574/// of the vector type if the loads are consecutive. If the loads are
12575/// consecutive but in descending order, a shuffle is added at the end
12576/// to reorder the vector.
12577static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
12578 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12579, __PRETTY_FUNCTION__))
12579 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12579, __PRETTY_FUNCTION__))
;
12580
12581 SDLoc dl(N);
12582
12583 // Return early for non byte-sized type, as they can't be consecutive.
12584 if (!N->getValueType(0).getVectorElementType().isByteSized())
12585 return SDValue();
12586
12587 bool InputsAreConsecutiveLoads = true;
12588 bool InputsAreReverseConsecutive = true;
12589 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
12590 SDValue FirstInput = N->getOperand(0);
12591 bool IsRoundOfExtLoad = false;
12592
12593 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
12594 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
12595 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
12596 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
12597 }
12598 // Not a build vector of (possibly fp_rounded) loads.
12599 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
12600 N->getNumOperands() == 1)
12601 return SDValue();
12602
12603 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
12604 // If any inputs are fp_round(extload), they all must be.
12605 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
12606 return SDValue();
12607
12608 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
12609 N->getOperand(i);
12610 if (NextInput.getOpcode() != ISD::LOAD)
12611 return SDValue();
12612
12613 SDValue PreviousInput =
12614 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
12615 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
12616 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
12617
12618 // If any inputs are fp_round(extload), they all must be.
12619 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
12620 return SDValue();
12621
12622 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
12623 InputsAreConsecutiveLoads = false;
12624 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
12625 InputsAreReverseConsecutive = false;
12626
12627 // Exit early if the loads are neither consecutive nor reverse consecutive.
12628 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
12629 return SDValue();
12630 }
12631
12632 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12633, __PRETTY_FUNCTION__))
12633 "The loads cannot be both consecutive and reverse consecutive.")((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12633, __PRETTY_FUNCTION__))
;
12634
12635 SDValue FirstLoadOp =
12636 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
12637 SDValue LastLoadOp =
12638 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
12639 N->getOperand(N->getNumOperands()-1);
12640
12641 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
12642 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
12643 if (InputsAreConsecutiveLoads) {
12644 assert(LD1 && "Input needs to be a LoadSDNode.")((LD1 && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12644, __PRETTY_FUNCTION__))
;
12645 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
12646 LD1->getBasePtr(), LD1->getPointerInfo(),
12647 LD1->getAlignment());
12648 }
12649 if (InputsAreReverseConsecutive) {
12650 assert(LDL && "Input needs to be a LoadSDNode.")((LDL && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12650, __PRETTY_FUNCTION__))
;
12651 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
12652 LDL->getBasePtr(), LDL->getPointerInfo(),
12653 LDL->getAlignment());
12654 SmallVector<int, 16> Ops;
12655 for (int i = N->getNumOperands() - 1; i >= 0; i--)
12656 Ops.push_back(i);
12657
12658 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
12659 DAG.getUNDEF(N->getValueType(0)), Ops);
12660 }
12661 return SDValue();
12662}
12663
12664// This function adds the required vector_shuffle needed to get
12665// the elements of the vector extract in the correct position
12666// as specified by the CorrectElems encoding.
12667static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
12668 SDValue Input, uint64_t Elems,
12669 uint64_t CorrectElems) {
12670 SDLoc dl(N);
12671
12672 unsigned NumElems = Input.getValueType().getVectorNumElements();
12673 SmallVector<int, 16> ShuffleMask(NumElems, -1);
12674
12675 // Knowing the element indices being extracted from the original
12676 // vector and the order in which they're being inserted, just put
12677 // them at element indices required for the instruction.
12678 for (unsigned i = 0; i < N->getNumOperands(); i++) {
12679 if (DAG.getDataLayout().isLittleEndian())
12680 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
12681 else
12682 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
12683 CorrectElems = CorrectElems >> 8;
12684 Elems = Elems >> 8;
12685 }
12686
12687 SDValue Shuffle =
12688 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
12689 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
12690
12691 EVT Ty = N->getValueType(0);
12692 SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
12693 return BV;
12694}
12695
12696// Look for build vector patterns where input operands come from sign
12697// extended vector_extract elements of specific indices. If the correct indices
12698// aren't used, add a vector shuffle to fix up the indices and create a new
12699// PPCISD:SExtVElems node which selects the vector sign extend instructions
12700// during instruction selection.
12701static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
12702 // This array encodes the indices that the vector sign extend instructions
12703 // extract from when extending from one type to another for both BE and LE.
12704 // The right nibble of each byte corresponds to the LE incides.
12705 // and the left nibble of each byte corresponds to the BE incides.
12706 // For example: 0x3074B8FC byte->word
12707 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12708 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12709 // For example: 0x000070F8 byte->double word
12710 // For LE: the allowed indices are: 0x0,0x8
12711 // For BE: the allowed indices are: 0x7,0xF
12712 uint64_t TargetElems[] = {
12713 0x3074B8FC, // b->w
12714 0x000070F8, // b->d
12715 0x10325476, // h->w
12716 0x00003074, // h->d
12717 0x00001032, // w->d
12718 };
12719
12720 uint64_t Elems = 0;
12721 int Index;
12722 SDValue Input;
12723
12724 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
12725 if (!Op)
12726 return false;
12727 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
12728 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
12729 return false;
12730
12731 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12732 // of the right width.
12733 SDValue Extract = Op.getOperand(0);
12734 if (Extract.getOpcode() == ISD::ANY_EXTEND)
12735 Extract = Extract.getOperand(0);
12736 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12737 return false;
12738
12739 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
12740 if (!ExtOp)
12741 return false;
12742
12743 Index = ExtOp->getZExtValue();
12744 if (Input && Input != Extract.getOperand(0))
12745 return false;
12746
12747 if (!Input)
12748 Input = Extract.getOperand(0);
12749
12750 Elems = Elems << 8;
12751 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
12752 Elems |= Index;
12753
12754 return true;
12755 };
12756
12757 // If the build vector operands aren't sign extended vector extracts,
12758 // of the same input vector, then return.
12759 for (unsigned i = 0; i < N->getNumOperands(); i++) {
12760 if (!isSExtOfVecExtract(N->getOperand(i))) {
12761 return SDValue();
12762 }
12763 }
12764
12765 // If the vector extract indicies are not correct, add the appropriate
12766 // vector_shuffle.
12767 int TgtElemArrayIdx;
12768 int InputSize = Input.getValueType().getScalarSizeInBits();
12769 int OutputSize = N->getValueType(0).getScalarSizeInBits();
12770 if (InputSize + OutputSize == 40)
12771 TgtElemArrayIdx = 0;
12772 else if (InputSize + OutputSize == 72)
12773 TgtElemArrayIdx = 1;
12774 else if (InputSize + OutputSize == 48)
12775 TgtElemArrayIdx = 2;
12776 else if (InputSize + OutputSize == 80)
12777 TgtElemArrayIdx = 3;
12778 else if (InputSize + OutputSize == 96)
12779 TgtElemArrayIdx = 4;
12780 else
12781 return SDValue();
12782
12783 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
12784 CorrectElems = DAG.getDataLayout().isLittleEndian()
12785 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
12786 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
12787 if (Elems != CorrectElems) {
12788 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
12789 }
12790
12791 // Regular lowering will catch cases where a shuffle is not needed.
12792 return SDValue();
12793}
12794
12795SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
12796 DAGCombinerInfo &DCI) const {
12797 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12798, __PRETTY_FUNCTION__))
12798 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12798, __PRETTY_FUNCTION__))
;
12799
12800 SelectionDAG &DAG = DCI.DAG;
12801 SDLoc dl(N);
12802
12803 if (!Subtarget.hasVSX())
12804 return SDValue();
12805
12806 // The target independent DAG combiner will leave a build_vector of
12807 // float-to-int conversions intact. We can generate MUCH better code for
12808 // a float-to-int conversion of a vector of floats.
12809 SDValue FirstInput = N->getOperand(0);
12810 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
12811 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
12812 if (Reduced)
12813 return Reduced;
12814 }
12815
12816 // If we're building a vector out of consecutive loads, just load that
12817 // vector type.
12818 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
12819 if (Reduced)
12820 return Reduced;
12821
12822 // If we're building a vector out of extended elements from another vector
12823 // we have P9 vector integer extend instructions. The code assumes legal
12824 // input types (i.e. it can't handle things like v4i16) so do not run before
12825 // legalization.
12826 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
12827 Reduced = combineBVOfVecSExt(N, DAG);
12828 if (Reduced)
12829 return Reduced;
12830 }
12831
12832
12833 if (N->getValueType(0) != MVT::v2f64)
12834 return SDValue();
12835
12836 // Looking for:
12837 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12838 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
12839 FirstInput.getOpcode() != ISD::UINT_TO_FP)
12840 return SDValue();
12841 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
12842 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
12843 return SDValue();
12844 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
12845 return SDValue();
12846
12847 SDValue Ext1 = FirstInput.getOperand(0);
12848 SDValue Ext2 = N->getOperand(1).getOperand(0);
12849 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12850 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12851 return SDValue();
12852
12853 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
12854 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
12855 if (!Ext1Op || !Ext2Op)
12856 return SDValue();
12857 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
12858 Ext1.getOperand(0) != Ext2.getOperand(0))
12859 return SDValue();
12860
12861 int FirstElem = Ext1Op->getZExtValue();
12862 int SecondElem = Ext2Op->getZExtValue();
12863 int SubvecIdx;
12864 if (FirstElem == 0 && SecondElem == 1)
12865 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
12866 else if (FirstElem == 2 && SecondElem == 3)
12867 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
12868 else
12869 return SDValue();
12870
12871 SDValue SrcVec = Ext1.getOperand(0);
12872 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
12873 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
12874 return DAG.getNode(NodeType, dl, MVT::v2f64,
12875 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
12876}
12877
12878SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
12879 DAGCombinerInfo &DCI) const {
12880 assert((N->getOpcode() == ISD::SINT_TO_FP ||(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12882, __PRETTY_FUNCTION__))
12881 N->getOpcode() == ISD::UINT_TO_FP) &&(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12882, __PRETTY_FUNCTION__))
12882 "Need an int -> FP conversion node here")(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12882, __PRETTY_FUNCTION__))
;
12883
12884 if (useSoftFloat() || !Subtarget.has64BitSupport())
12885 return SDValue();
12886
12887 SelectionDAG &DAG = DCI.DAG;
12888 SDLoc dl(N);
12889 SDValue Op(N, 0);
12890
12891 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12892 // from the hardware.
12893 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
12894 return SDValue();
12895 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
12896 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
12897 return SDValue();
12898
12899 SDValue FirstOperand(Op.getOperand(0));
12900 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
12901 (FirstOperand.getValueType() == MVT::i8 ||
12902 FirstOperand.getValueType() == MVT::i16);
12903 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
12904 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
12905 bool DstDouble = Op.getValueType() == MVT::f64;
12906 unsigned ConvOp = Signed ?
12907 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
12908 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
12909 SDValue WidthConst =
12910 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
12911 dl, false);
12912 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
12913 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
12914 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
12915 DAG.getVTList(MVT::f64, MVT::Other),
12916 Ops, MVT::i8, LDN->getMemOperand());
12917
12918 // For signed conversion, we need to sign-extend the value in the VSR
12919 if (Signed) {
12920 SDValue ExtOps[] = { Ld, WidthConst };
12921 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
12922 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
12923 } else
12924 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
12925 }
12926
12927
12928 // For i32 intermediate values, unfortunately, the conversion functions
12929 // leave the upper 32 bits of the value are undefined. Within the set of
12930 // scalar instructions, we have no method for zero- or sign-extending the
12931 // value. Thus, we cannot handle i32 intermediate values here.
12932 if (Op.getOperand(0).getValueType() == MVT::i32)
12933 return SDValue();
12934
12935 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12936, __PRETTY_FUNCTION__))
12936 "UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12936, __PRETTY_FUNCTION__))
;
12937
12938 // If we have FCFIDS, then use it when converting to single-precision.
12939 // Otherwise, convert to double-precision and then round.
12940 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12941 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
12942 : PPCISD::FCFIDS)
12943 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
12944 : PPCISD::FCFID);
12945 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12946 ? MVT::f32
12947 : MVT::f64;
12948
12949 // If we're converting from a float, to an int, and back to a float again,
12950 // then we don't need the store/load pair at all.
12951 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
12952 Subtarget.hasFPCVT()) ||
12953 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
12954 SDValue Src = Op.getOperand(0).getOperand(0);
12955 if (Src.getValueType() == MVT::f32) {
12956 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
12957 DCI.AddToWorklist(Src.getNode());
12958 } else if (Src.getValueType() != MVT::f64) {
12959 // Make sure that we don't pick up a ppc_fp128 source value.
12960 return SDValue();
12961 }
12962
12963 unsigned FCTOp =
12964 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
12965 PPCISD::FCTIDUZ;
12966
12967 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
12968 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
12969
12970 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
12971 FP = DAG.getNode(ISD::FP_ROUND, dl,
12972 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
12973 DCI.AddToWorklist(FP.getNode());
12974 }
12975
12976 return FP;
12977 }
12978
12979 return SDValue();
12980}
12981
12982// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12983// builtins) into loads with swaps.
12984SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
12985 DAGCombinerInfo &DCI) const {
12986 SelectionDAG &DAG = DCI.DAG;
12987 SDLoc dl(N);
12988 SDValue Chain;
12989 SDValue Base;
12990 MachineMemOperand *MMO;
12991
12992 switch (N->getOpcode()) {
12993 default:
12994 llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12994)
;
12995 case ISD::LOAD: {
12996 LoadSDNode *LD = cast<LoadSDNode>(N);
12997 Chain = LD->getChain();
12998 Base = LD->getBasePtr();
12999 MMO = LD->getMemOperand();
13000 // If the MMO suggests this isn't a load of a full vector, leave
13001 // things alone. For a built-in, we have to make the change for
13002 // correctness, so if there is a size problem that will be a bug.
13003 if (MMO->getSize() < 16)
13004 return SDValue();
13005 break;
13006 }
13007 case ISD::INTRINSIC_W_CHAIN: {
13008 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13009 Chain = Intrin->getChain();
13010 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13011 // us what we want. Get operand 2 instead.
13012 Base = Intrin->getOperand(2);
13013 MMO = Intrin->getMemOperand();
13014 break;
13015 }
13016 }
13017
13018 MVT VecTy = N->getValueType(0).getSimpleVT();
13019
13020 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13021 // aligned and the type is a vector with elements up to 4 bytes
13022 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13023 && VecTy.getScalarSizeInBits() <= 32 ) {
13024 return SDValue();
13025 }
13026
13027 SDValue LoadOps[] = { Chain, Base };
13028 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
13029 DAG.getVTList(MVT::v2f64, MVT::Other),
13030 LoadOps, MVT::v2f64, MMO);
13031
13032 DCI.AddToWorklist(Load.getNode());
13033 Chain = Load.getValue(1);
13034 SDValue Swap = DAG.getNode(
13035 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13036 DCI.AddToWorklist(Swap.getNode());
13037
13038 // Add a bitcast if the resulting load type doesn't match v2f64.
13039 if (VecTy != MVT::v2f64) {
13040 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13041 DCI.AddToWorklist(N.getNode());
13042 // Package {bitcast value, swap's chain} to match Load's shape.
13043 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13044 N, Swap.getValue(1));
13045 }
13046
13047 return Swap;
13048}
13049
13050// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13051// builtins) into stores with swaps.
13052SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
13053 DAGCombinerInfo &DCI) const {
13054 SelectionDAG &DAG = DCI.DAG;
13055 SDLoc dl(N);
13056 SDValue Chain;
13057 SDValue Base;
13058 unsigned SrcOpnd;
13059 MachineMemOperand *MMO;
13060
13061 switch (N->getOpcode()) {
13062 default:
13063 llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13063)
;
13064 case ISD::STORE: {
13065 StoreSDNode *ST = cast<StoreSDNode>(N);
13066 Chain = ST->getChain();
13067 Base = ST->getBasePtr();
13068 MMO = ST->getMemOperand();
13069 SrcOpnd = 1;
13070 // If the MMO suggests this isn't a store of a full vector, leave
13071 // things alone. For a built-in, we have to make the change for
13072 // correctness, so if there is a size problem that will be a bug.
13073 if (MMO->getSize() < 16)
13074 return SDValue();
13075 break;
13076 }
13077 case ISD::INTRINSIC_VOID: {
13078 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13079 Chain = Intrin->getChain();
13080 // Intrin->getBasePtr() oddly does not get what we want.
13081 Base = Intrin->getOperand(3);
13082 MMO = Intrin->getMemOperand();
13083 SrcOpnd = 2;
13084 break;
13085 }
13086 }
13087
13088 SDValue Src = N->getOperand(SrcOpnd);
13089 MVT VecTy = Src.getValueType().getSimpleVT();
13090
13091 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13092 // aligned and the type is a vector with elements up to 4 bytes
13093 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13094 && VecTy.getScalarSizeInBits() <= 32 ) {
13095 return SDValue();
13096 }
13097
13098 // All stores are done as v2f64 and possible bit cast.
13099 if (VecTy != MVT::v2f64) {
13100 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13101 DCI.AddToWorklist(Src.getNode());
13102 }
13103
13104 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13105 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13106 DCI.AddToWorklist(Swap.getNode());
13107 Chain = Swap.getValue(1);
13108 SDValue StoreOps[] = { Chain, Swap, Base };
13109 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13110 DAG.getVTList(MVT::Other),
13111 StoreOps, VecTy, MMO);
13112 DCI.AddToWorklist(Store.getNode());
13113 return Store;
13114}
13115
13116// Handle DAG combine for STORE (FP_TO_INT F).
13117SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13118 DAGCombinerInfo &DCI) const {
13119
13120 SelectionDAG &DAG = DCI.DAG;
13121 SDLoc dl(N);
13122 unsigned Opcode = N->getOperand(1).getOpcode();
13123
13124 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13125, __PRETTY_FUNCTION__))
13125 && "Not a FP_TO_INT Instruction!")(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13125, __PRETTY_FUNCTION__))
;
13126
13127 SDValue Val = N->getOperand(1).getOperand(0);
13128 EVT Op1VT = N->getOperand(1).getValueType();
13129 EVT ResVT = Val.getValueType();
13130
13131 // Floating point types smaller than 32 bits are not legal on Power.
13132 if (ResVT.getScalarSizeInBits() < 32)
13133 return SDValue();
13134
13135 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13136 bool ValidTypeForStoreFltAsInt =
13137 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13138 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13139
13140 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
13141 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13142 return SDValue();
13143
13144 // Extend f32 values to f64
13145 if (ResVT.getScalarSizeInBits() == 32) {
13146 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13147 DCI.AddToWorklist(Val.getNode());
13148 }
13149
13150 // Set signed or unsigned conversion opcode.
13151 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13152 PPCISD::FP_TO_SINT_IN_VSR :
13153 PPCISD::FP_TO_UINT_IN_VSR;
13154
13155 Val = DAG.getNode(ConvOpcode,
13156 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13157 DCI.AddToWorklist(Val.getNode());
13158
13159 // Set number of bytes being converted.
13160 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13161 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13162 DAG.getIntPtrConstant(ByteSize, dl, false),
13163 DAG.getValueType(Op1VT) };
13164
13165 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13166 DAG.getVTList(MVT::Other), Ops,
13167 cast<StoreSDNode>(N)->getMemoryVT(),
13168 cast<StoreSDNode>(N)->getMemOperand());
13169
13170 DCI.AddToWorklist(Val.getNode());
13171 return Val;
13172}
13173
13174SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
13175 LSBaseSDNode *LSBase,
13176 DAGCombinerInfo &DCI) const {
13177 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13178, __PRETTY_FUNCTION__))
13178 "Not a reverse memop pattern!")(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13178, __PRETTY_FUNCTION__))
;
13179
13180 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
13181 auto Mask = SVN->getMask();
13182 int i = 0;
13183 auto I = Mask.rbegin();
13184 auto E = Mask.rend();
13185
13186 for (; I != E; ++I) {
13187 if (*I != i)
13188 return false;
13189 i++;
13190 }
13191 return true;
13192 };
13193
13194 SelectionDAG &DAG = DCI.DAG;
13195 EVT VT = SVN->getValueType(0);
13196
13197 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
13198 return SDValue();
13199
13200 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
13201 // See comment in PPCVSXSwapRemoval.cpp.
13202 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
13203 if (!Subtarget.hasP9Vector())
13204 return SDValue();
13205
13206 if(!IsElementReverse(SVN))
13207 return SDValue();
13208
13209 if (LSBase->getOpcode() == ISD::LOAD) {
13210 SDLoc dl(SVN);
13211 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
13212 return DAG.getMemIntrinsicNode(
13213 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
13214 LSBase->getMemoryVT(), LSBase->getMemOperand());
13215 }
13216
13217 if (LSBase->getOpcode() == ISD::STORE) {
13218 SDLoc dl(LSBase);
13219 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
13220 LSBase->getBasePtr()};
13221 return DAG.getMemIntrinsicNode(
13222 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
13223 LSBase->getMemoryVT(), LSBase->getMemOperand());
13224 }
13225
13226 llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13226)
;
13227}
13228
13229SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
13230 DAGCombinerInfo &DCI) const {
13231 SelectionDAG &DAG = DCI.DAG;
13232 SDLoc dl(N);
13233 switch (N->getOpcode()) {
13234 default: break;
13235 case ISD::ADD:
13236 return combineADD(N, DCI);
13237 case ISD::SHL:
13238 return combineSHL(N, DCI);
13239 case ISD::SRA:
13240 return combineSRA(N, DCI);
13241 case ISD::SRL:
13242 return combineSRL(N, DCI);
13243 case ISD::MUL:
13244 return combineMUL(N, DCI);
13245 case PPCISD::SHL:
13246 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
13247 return N->getOperand(0);
13248 break;
13249 case PPCISD::SRL:
13250 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
13251 return N->getOperand(0);
13252 break;
13253 case PPCISD::SRA:
13254 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
13255 if (C->isNullValue() || // 0 >>s V -> 0.
13256 C->isAllOnesValue()) // -1 >>s V -> -1.
13257 return N->getOperand(0);
13258 }
13259 break;
13260 case ISD::SIGN_EXTEND:
13261 case ISD::ZERO_EXTEND:
13262 case ISD::ANY_EXTEND:
13263 return DAGCombineExtBoolTrunc(N, DCI);
13264 case ISD::TRUNCATE:
13265 return combineTRUNCATE(N, DCI);
13266 case ISD::SETCC:
13267 if (SDValue CSCC = combineSetCC(N, DCI))
13268 return CSCC;
13269 LLVM_FALLTHROUGH[[gnu::fallthrough]];
13270 case ISD::SELECT_CC:
13271 return DAGCombineTruncBoolExt(N, DCI);
13272 case ISD::SINT_TO_FP:
13273 case ISD::UINT_TO_FP:
13274 return combineFPToIntToFP(N, DCI);
13275 case ISD::VECTOR_SHUFFLE:
13276 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
13277 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
13278 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
13279 }
13280 break;
13281 case ISD::STORE: {
13282
13283 EVT Op1VT = N->getOperand(1).getValueType();
13284 unsigned Opcode = N->getOperand(1).getOpcode();
13285
13286 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
13287 SDValue Val= combineStoreFPToInt(N, DCI);
13288 if (Val)
13289 return Val;
13290 }
13291
13292 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
13293 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
13294 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
13295 if (Val)
13296 return Val;
13297 }
13298
13299 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13300 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
13301 N->getOperand(1).getNode()->hasOneUse() &&
13302 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
13303 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
13304
13305 // STBRX can only handle simple types and it makes no sense to store less
13306 // two bytes in byte-reversed order.
13307 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
13308 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
13309 break;
13310
13311 SDValue BSwapOp = N->getOperand(1).getOperand(0);
13312 // Do an any-extend to 32-bits if this is a half-word input.
13313 if (BSwapOp.getValueType() == MVT::i16)
13314 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
13315
13316 // If the type of BSWAP operand is wider than stored memory width
13317 // it need to be shifted to the right side before STBRX.
13318 if (Op1VT.bitsGT(mVT)) {
13319 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
13320 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
13321 DAG.getConstant(Shift, dl, MVT::i32));
13322 // Need to truncate if this is a bswap of i64 stored as i32/i16.
13323 if (Op1VT == MVT::i64)
13324 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
13325 }
13326
13327 SDValue Ops[] = {
13328 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
13329 };
13330 return
13331 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
13332 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
13333 cast<StoreSDNode>(N)->getMemOperand());
13334 }
13335
13336 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
13337 // So it can increase the chance of CSE constant construction.
13338 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
13339 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
13340 // Need to sign-extended to 64-bits to handle negative values.
13341 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
13342 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
13343 MemVT.getSizeInBits());
13344 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
13345
13346 // DAG.getTruncStore() can't be used here because it doesn't accept
13347 // the general (base + offset) addressing mode.
13348 // So we use UpdateNodeOperands and setTruncatingStore instead.
13349 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
13350 N->getOperand(3));
13351 cast<StoreSDNode>(N)->setTruncatingStore(true);
13352 return SDValue(N, 0);
13353 }
13354
13355 // For little endian, VSX stores require generating xxswapd/lxvd2x.
13356 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13357 if (Op1VT.isSimple()) {
13358 MVT StoreVT = Op1VT.getSimpleVT();
13359 if (Subtarget.needsSwapsForVSXMemOps() &&
13360 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
13361 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
13362 return expandVSXStoreForLE(N, DCI);
13363 }
13364 break;
13365 }
13366 case ISD::LOAD: {
13367 LoadSDNode *LD = cast<LoadSDNode>(N);
13368 EVT VT = LD->getValueType(0);
13369
13370 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13371 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13372 if (VT.isSimple()) {
13373 MVT LoadVT = VT.getSimpleVT();
13374 if (Subtarget.needsSwapsForVSXMemOps() &&
13375 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
13376 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
13377 return expandVSXLoadForLE(N, DCI);
13378 }
13379
13380 // We sometimes end up with a 64-bit integer load, from which we extract
13381 // two single-precision floating-point numbers. This happens with
13382 // std::complex<float>, and other similar structures, because of the way we
13383 // canonicalize structure copies. However, if we lack direct moves,
13384 // then the final bitcasts from the extracted integer values to the
13385 // floating-point numbers turn into store/load pairs. Even with direct moves,
13386 // just loading the two floating-point numbers is likely better.
13387 auto ReplaceTwoFloatLoad = [&]() {
13388 if (VT != MVT::i64)
13389 return false;
13390
13391 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
13392 LD->isVolatile())
13393 return false;
13394
13395 // We're looking for a sequence like this:
13396 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13397 // t16: i64 = srl t13, Constant:i32<32>
13398 // t17: i32 = truncate t16
13399 // t18: f32 = bitcast t17
13400 // t19: i32 = truncate t13
13401 // t20: f32 = bitcast t19
13402
13403 if (!LD->hasNUsesOfValue(2, 0))
13404 return false;
13405
13406 auto UI = LD->use_begin();
13407 while (UI.getUse().getResNo() != 0) ++UI;
13408 SDNode *Trunc = *UI++;
13409 while (UI.getUse().getResNo() != 0) ++UI;
13410 SDNode *RightShift = *UI;
13411 if (Trunc->getOpcode() != ISD::TRUNCATE)
13412 std::swap(Trunc, RightShift);
13413
13414 if (Trunc->getOpcode() != ISD::TRUNCATE ||
13415 Trunc->getValueType(0) != MVT::i32 ||
13416 !Trunc->hasOneUse())
13417 return false;
13418 if (RightShift->getOpcode() != ISD::SRL ||
13419 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
13420 RightShift->getConstantOperandVal(1) != 32 ||
13421 !RightShift->hasOneUse())
13422 return false;
13423
13424 SDNode *Trunc2 = *RightShift->use_begin();
13425 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
13426 Trunc2->getValueType(0) != MVT::i32 ||
13427 !Trunc2->hasOneUse())
13428 return false;
13429
13430 SDNode *Bitcast = *Trunc->use_begin();
13431 SDNode *Bitcast2 = *Trunc2->use_begin();
13432
13433 if (Bitcast->getOpcode() != ISD::BITCAST ||
13434 Bitcast->getValueType(0) != MVT::f32)
13435 return false;
13436 if (Bitcast2->getOpcode() != ISD::BITCAST ||
13437 Bitcast2->getValueType(0) != MVT::f32)
13438 return false;
13439
13440 if (Subtarget.isLittleEndian())
13441 std::swap(Bitcast, Bitcast2);
13442
13443 // Bitcast has the second float (in memory-layout order) and Bitcast2
13444 // has the first one.
13445
13446 SDValue BasePtr = LD->getBasePtr();
13447 if (LD->isIndexed()) {
13448 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13449, __PRETTY_FUNCTION__))
13449 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13449, __PRETTY_FUNCTION__))
;
13450 BasePtr =
13451 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
13452 LD->getOffset());
13453 }
13454
13455 auto MMOFlags =
13456 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
13457 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
13458 LD->getPointerInfo(), LD->getAlignment(),
13459 MMOFlags, LD->getAAInfo());
13460 SDValue AddPtr =
13461 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
13462 BasePtr, DAG.getIntPtrConstant(4, dl));
13463 SDValue FloatLoad2 = DAG.getLoad(
13464 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
13465 LD->getPointerInfo().getWithOffset(4),
13466 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
13467
13468 if (LD->isIndexed()) {
13469 // Note that DAGCombine should re-form any pre-increment load(s) from
13470 // what is produced here if that makes sense.
13471 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
13472 }
13473
13474 DCI.CombineTo(Bitcast2, FloatLoad);
13475 DCI.CombineTo(Bitcast, FloatLoad2);
13476
13477 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
13478 SDValue(FloatLoad2.getNode(), 1));
13479 return true;
13480 };
13481
13482 if (ReplaceTwoFloatLoad())
13483 return SDValue(N, 0);
13484
13485 EVT MemVT = LD->getMemoryVT();
13486 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
13487 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
13488 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
13489 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
13490 if (LD->isUnindexed() && VT.isVector() &&
13491 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
13492 // P8 and later hardware should just use LOAD.
13493 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
13494 VT == MVT::v4i32 || VT == MVT::v4f32)) ||
13495 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
13496 LD->getAlignment() >= ScalarABIAlignment)) &&
13497 LD->getAlignment() < ABIAlignment) {
13498 // This is a type-legal unaligned Altivec or QPX load.
13499 SDValue Chain = LD->getChain();
13500 SDValue Ptr = LD->getBasePtr();
13501 bool isLittleEndian = Subtarget.isLittleEndian();
13502
13503 // This implements the loading of unaligned vectors as described in
13504 // the venerable Apple Velocity Engine overview. Specifically:
13505 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13506 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13507 //
13508 // The general idea is to expand a sequence of one or more unaligned
13509 // loads into an alignment-based permutation-control instruction (lvsl
13510 // or lvsr), a series of regular vector loads (which always truncate
13511 // their input address to an aligned address), and a series of
13512 // permutations. The results of these permutations are the requested
13513 // loaded values. The trick is that the last "extra" load is not taken
13514 // from the address you might suspect (sizeof(vector) bytes after the
13515 // last requested load), but rather sizeof(vector) - 1 bytes after the
13516 // last requested vector. The point of this is to avoid a page fault if
13517 // the base address happened to be aligned. This works because if the
13518 // base address is aligned, then adding less than a full vector length
13519 // will cause the last vector in the sequence to be (re)loaded.
13520 // Otherwise, the next vector will be fetched as you might suspect was
13521 // necessary.
13522
13523 // We might be able to reuse the permutation generation from
13524 // a different base address offset from this one by an aligned amount.
13525 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13526 // optimization later.
13527 Intrinsic::ID Intr, IntrLD, IntrPerm;
13528 MVT PermCntlTy, PermTy, LDTy;
13529 if (Subtarget.hasAltivec()) {
13530 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
13531 Intrinsic::ppc_altivec_lvsl;
13532 IntrLD = Intrinsic::ppc_altivec_lvx;
13533 IntrPerm = Intrinsic::ppc_altivec_vperm;
13534 PermCntlTy = MVT::v16i8;
13535 PermTy = MVT::v4i32;
13536 LDTy = MVT::v4i32;
13537 } else {
13538 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
13539 Intrinsic::ppc_qpx_qvlpcls;
13540 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
13541 Intrinsic::ppc_qpx_qvlfs;
13542 IntrPerm = Intrinsic::ppc_qpx_qvfperm;
13543 PermCntlTy = MVT::v4f64;
13544 PermTy = MVT::v4f64;
13545 LDTy = MemVT.getSimpleVT();
13546 }
13547
13548 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
13549
13550 // Create the new MMO for the new base load. It is like the original MMO,
13551 // but represents an area in memory almost twice the vector size centered
13552 // on the original address. If the address is unaligned, we might start
13553 // reading up to (sizeof(vector)-1) bytes below the address of the
13554 // original unaligned load.
13555 MachineFunction &MF = DAG.getMachineFunction();
13556 MachineMemOperand *BaseMMO =
13557 MF.getMachineMemOperand(LD->getMemOperand(),
13558 -(long)MemVT.getStoreSize()+1,
13559 2*MemVT.getStoreSize()-1);
13560
13561 // Create the new base load.
13562 SDValue LDXIntID =
13563 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
13564 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
13565 SDValue BaseLoad =
13566 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13567 DAG.getVTList(PermTy, MVT::Other),
13568 BaseLoadOps, LDTy, BaseMMO);
13569
13570 // Note that the value of IncOffset (which is provided to the next
13571 // load's pointer info offset value, and thus used to calculate the
13572 // alignment), and the value of IncValue (which is actually used to
13573 // increment the pointer value) are different! This is because we
13574 // require the next load to appear to be aligned, even though it
13575 // is actually offset from the base pointer by a lesser amount.
13576 int IncOffset = VT.getSizeInBits() / 8;
13577 int IncValue = IncOffset;
13578
13579 // Walk (both up and down) the chain looking for another load at the real
13580 // (aligned) offset (the alignment of the other load does not matter in
13581 // this case). If found, then do not use the offset reduction trick, as
13582 // that will prevent the loads from being later combined (as they would
13583 // otherwise be duplicates).
13584 if (!findConsecutiveLoad(LD, DAG))
13585 --IncValue;
13586
13587 SDValue Increment =
13588 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
13589 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
13590
13591 MachineMemOperand *ExtraMMO =
13592 MF.getMachineMemOperand(LD->getMemOperand(),
13593 1, 2*MemVT.getStoreSize()-1);
13594 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
13595 SDValue ExtraLoad =
13596 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
13597 DAG.getVTList(PermTy, MVT::Other),
13598 ExtraLoadOps, LDTy, ExtraMMO);
13599
13600 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
13601 BaseLoad.getValue(1), ExtraLoad.getValue(1));
13602
13603 // Because vperm has a big-endian bias, we must reverse the order
13604 // of the input vectors and complement the permute control vector
13605 // when generating little endian code. We have already handled the
13606 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13607 // and ExtraLoad here.
13608 SDValue Perm;
13609 if (isLittleEndian)
13610 Perm = BuildIntrinsicOp(IntrPerm,
13611 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
13612 else
13613 Perm = BuildIntrinsicOp(IntrPerm,
13614 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
13615
13616 if (VT != PermTy)
13617 Perm = Subtarget.hasAltivec() ?
13618 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
13619 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
13620 DAG.getTargetConstant(1, dl, MVT::i64));
13621 // second argument is 1 because this rounding
13622 // is always exact.
13623
13624 // The output of the permutation is our loaded result, the TokenFactor is
13625 // our new chain.
13626 DCI.CombineTo(N, Perm, TF);
13627 return SDValue(N, 0);
13628 }
13629 }
13630 break;
13631 case ISD::INTRINSIC_WO_CHAIN: {
13632 bool isLittleEndian = Subtarget.isLittleEndian();
13633 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13634 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
13635 : Intrinsic::ppc_altivec_lvsl);
13636 if ((IID == Intr ||
13637 IID == Intrinsic::ppc_qpx_qvlpcld ||
13638 IID == Intrinsic::ppc_qpx_qvlpcls) &&
13639 N->getOperand(1)->getOpcode() == ISD::ADD) {
13640 SDValue Add = N->getOperand(1);
13641
13642 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
13643 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13644
13645 if (DAG.MaskedValueIsZero(Add->getOperand(1),
13646 APInt::getAllOnesValue(Bits /* alignment */)
13647 .zext(Add.getScalarValueSizeInBits()))) {
13648 SDNode *BasePtr = Add->getOperand(0).getNode();
13649 for (SDNode::use_iterator UI = BasePtr->use_begin(),
13650 UE = BasePtr->use_end();
13651 UI != UE; ++UI) {
13652 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13653 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
13654 // We've found another LVSL/LVSR, and this address is an aligned
13655 // multiple of that one. The results will be the same, so use the
13656 // one we've just found instead.
13657
13658 return SDValue(*UI, 0);
13659 }
13660 }
13661 }
13662
13663 if (isa<ConstantSDNode>(Add->getOperand(1))) {
13664 SDNode *BasePtr = Add->getOperand(0).getNode();
13665 for (SDNode::use_iterator UI = BasePtr->use_begin(),
13666 UE = BasePtr->use_end(); UI != UE; ++UI) {
13667 if (UI->getOpcode() == ISD::ADD &&
13668 isa<ConstantSDNode>(UI->getOperand(1)) &&
13669 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
13670 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
13671 (1ULL << Bits) == 0) {
13672 SDNode *OtherAdd = *UI;
13673 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
13674 VE = OtherAdd->use_end(); VI != VE; ++VI) {
13675 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13676 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
13677 return SDValue(*VI, 0);
13678 }
13679 }
13680 }
13681 }
13682 }
13683 }
13684
13685 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13686 // Expose the vabsduw/h/b opportunity for down stream
13687 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
13688 (IID == Intrinsic::ppc_altivec_vmaxsw ||
13689 IID == Intrinsic::ppc_altivec_vmaxsh ||
13690 IID == Intrinsic::ppc_altivec_vmaxsb)) {
13691 SDValue V1 = N->getOperand(1);
13692 SDValue V2 = N->getOperand(2);
13693 if ((V1.getSimpleValueType() == MVT::v4i32 ||
13694 V1.getSimpleValueType() == MVT::v8i16 ||
13695 V1.getSimpleValueType() == MVT::v16i8) &&
13696 V1.getSimpleValueType() == V2.getSimpleValueType()) {
13697 // (0-a, a)
13698 if (V1.getOpcode() == ISD::SUB &&
13699 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
13700 V1.getOperand(1) == V2) {
13701 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
13702 }
13703 // (a, 0-a)
13704 if (V2.getOpcode() == ISD::SUB &&
13705 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
13706 V2.getOperand(1) == V1) {
13707 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13708 }
13709 // (x-y, y-x)
13710 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
13711 V1.getOperand(0) == V2.getOperand(1) &&
13712 V1.getOperand(1) == V2.getOperand(0)) {
13713 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13714 }
13715 }
13716 }
13717 }
13718
13719 break;
13720 case ISD::INTRINSIC_W_CHAIN:
13721 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13722 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13723 if (Subtarget.needsSwapsForVSXMemOps()) {
13724 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13725 default:
13726 break;
13727 case Intrinsic::ppc_vsx_lxvw4x:
13728 case Intrinsic::ppc_vsx_lxvd2x:
13729 return expandVSXLoadForLE(N, DCI);
13730 }
13731 }
13732 break;
13733 case ISD::INTRINSIC_VOID:
13734 // For little endian, VSX stores require generating xxswapd/stxvd2x.
13735 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13736 if (Subtarget.needsSwapsForVSXMemOps()) {
13737 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13738 default:
13739 break;
13740 case Intrinsic::ppc_vsx_stxvw4x:
13741 case Intrinsic::ppc_vsx_stxvd2x:
13742 return expandVSXStoreForLE(N, DCI);
13743 }
13744 }
13745 break;
13746 case ISD::BSWAP:
13747 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13748 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
13749 N->getOperand(0).hasOneUse() &&
13750 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
13751 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
13752 N->getValueType(0) == MVT::i64))) {
13753 SDValue Load = N->getOperand(0);
13754 LoadSDNode *LD = cast<LoadSDNode>(Load);
13755 // Create the byte-swapping load.
13756 SDValue Ops[] = {
13757 LD->getChain(), // Chain
13758 LD->getBasePtr(), // Ptr
13759 DAG.getValueType(N->getValueType(0)) // VT
13760 };
13761 SDValue BSLoad =
13762 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
13763 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
13764 MVT::i64 : MVT::i32, MVT::Other),
13765 Ops, LD->getMemoryVT(), LD->getMemOperand());
13766
13767 // If this is an i16 load, insert the truncate.
13768 SDValue ResVal = BSLoad;
13769 if (N->getValueType(0) == MVT::i16)
13770 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
13771
13772 // First, combine the bswap away. This makes the value produced by the
13773 // load dead.
13774 DCI.CombineTo(N, ResVal);
13775
13776 // Next, combine the load away, we give it a bogus result value but a real
13777 // chain result. The result value is dead because the bswap is dead.
13778 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
13779
13780 // Return N so it doesn't get rechecked!
13781 return SDValue(N, 0);
13782 }
13783 break;
13784 case PPCISD::VCMP:
13785 // If a VCMPo node already exists with exactly the same operands as this
13786 // node, use its result instead of this node (VCMPo computes both a CR6 and
13787 // a normal output).
13788 //
13789 if (!N->getOperand(0).hasOneUse() &&
13790 !N->getOperand(1).hasOneUse() &&
13791 !N->getOperand(2).hasOneUse()) {
13792
13793 // Scan all of the users of the LHS, looking for VCMPo's that match.
13794 SDNode *VCMPoNode = nullptr;
13795
13796 SDNode *LHSN = N->getOperand(0).getNode();
13797 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
13798 UI != E; ++UI)
13799 if (UI->getOpcode() == PPCISD::VCMPo &&
13800 UI->getOperand(1) == N->getOperand(1) &&
13801 UI->getOperand(2) == N->getOperand(2) &&
13802 UI->getOperand(0) == N->getOperand(0)) {
13803 VCMPoNode = *UI;
13804 break;
13805 }
13806
13807 // If there is no VCMPo node, or if the flag value has a single use, don't
13808 // transform this.
13809 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
13810 break;
13811
13812 // Look at the (necessarily single) use of the flag value. If it has a
13813 // chain, this transformation is more complex. Note that multiple things
13814 // could use the value result, which we should ignore.
13815 SDNode *FlagUser = nullptr;
13816 for (SDNode::use_iterator UI = VCMPoNode->use_begin();
13817 FlagUser == nullptr; ++UI) {
13818 assert(UI != VCMPoNode->use_end() && "Didn't find user!")((UI != VCMPoNode->use_end() && "Didn't find user!"
) ? static_cast<void> (0) : __assert_fail ("UI != VCMPoNode->use_end() && \"Didn't find user!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13818, __PRETTY_FUNCTION__))
;
13819 SDNode *User = *UI;
13820 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
13821 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
13822 FlagUser = User;
13823 break;
13824 }
13825 }
13826 }
13827
13828 // If the user is a MFOCRF instruction, we know this is safe.
13829 // Otherwise we give up for right now.
13830 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
13831 return SDValue(VCMPoNode, 0);
13832 }
13833 break;
13834 case ISD::BRCOND: {
13835 SDValue Cond = N->getOperand(1);
13836 SDValue Target = N->getOperand(2);
13837
13838 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13839 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
13840 Intrinsic::loop_decrement) {
13841
13842 // We now need to make the intrinsic dead (it cannot be instruction
13843 // selected).
13844 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
13845 assert(Cond.getNode()->hasOneUse() &&((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13846, __PRETTY_FUNCTION__))
13846 "Counter decrement has more than one use")((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13846, __PRETTY_FUNCTION__))
;
13847
13848 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
13849 N->getOperand(0), Target);
13850 }
13851 }
13852 break;
13853 case ISD::BR_CC: {
13854 // If this is a branch on an altivec predicate comparison, lower this so
13855 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13856 // lowering is done pre-legalize, because the legalizer lowers the predicate
13857 // compare down to code that is difficult to reassemble.
13858 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
13859 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
13860
13861 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13862 // value. If so, pass-through the AND to get to the intrinsic.
13863 if (LHS.getOpcode() == ISD::AND &&
13864 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13865 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
13866 Intrinsic::loop_decrement &&
13867 isa<ConstantSDNode>(LHS.getOperand(1)) &&
13868 !isNullConstant(LHS.getOperand(1)))
13869 LHS = LHS.getOperand(0);
13870
13871 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13872 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
13873 Intrinsic::loop_decrement &&
13874 isa<ConstantSDNode>(RHS)) {
13875 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13876, __PRETTY_FUNCTION__))
13876 "Counter decrement comparison is not EQ or NE")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13876, __PRETTY_FUNCTION__))
;
13877
13878 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13879 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
13880 (CC == ISD::SETNE && !Val);
13881
13882 // We now need to make the intrinsic dead (it cannot be instruction
13883 // selected).
13884 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
13885 assert(LHS.getNode()->hasOneUse() &&((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13886, __PRETTY_FUNCTION__))
13886 "Counter decrement has more than one use")((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13886, __PRETTY_FUNCTION__))
;
13887
13888 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
13889 N->getOperand(0), N->getOperand(4));
13890 }
13891
13892 int CompareOpc;
13893 bool isDot;
13894
13895 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13896 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
13897 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
13898 assert(isDot && "Can't compare against a vector result!")((isDot && "Can't compare against a vector result!") ?
static_cast<void> (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13898, __PRETTY_FUNCTION__))
;
13899
13900 // If this is a comparison against something other than 0/1, then we know
13901 // that the condition is never/always true.
13902 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13903 if (Val != 0 && Val != 1) {
13904 if (CC == ISD::SETEQ) // Cond never true, remove branch.
13905 return N->getOperand(0);
13906 // Always !=, turn it into an unconditional branch.
13907 return DAG.getNode(ISD::BR, dl, MVT::Other,
13908 N->getOperand(0), N->getOperand(4));
13909 }
13910
13911 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
13912
13913 // Create the PPCISD altivec 'dot' comparison node.
13914 SDValue Ops[] = {
13915 LHS.getOperand(2), // LHS of compare
13916 LHS.getOperand(3), // RHS of compare
13917 DAG.getConstant(CompareOpc, dl, MVT::i32)
13918 };
13919 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
13920 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
13921
13922 // Unpack the result based on how the target uses it.
13923 PPC::Predicate CompOpc;
13924 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
13925 default: // Can't happen, don't crash on invalid number though.
13926 case 0: // Branch on the value of the EQ bit of CR6.
13927 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
13928 break;
13929 case 1: // Branch on the inverted value of the EQ bit of CR6.
13930 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
13931 break;
13932 case 2: // Branch on the value of the LT bit of CR6.
13933 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
13934 break;
13935 case 3: // Branch on the inverted value of the LT bit of CR6.
13936 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
13937 break;
13938 }
13939
13940 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
13941 DAG.getConstant(CompOpc, dl, MVT::i32),
13942 DAG.getRegister(PPC::CR6, MVT::i32),
13943 N->getOperand(4), CompNode.getValue(1));
13944 }
13945 break;
13946 }
13947 case ISD::BUILD_VECTOR:
13948 return DAGCombineBuildVector(N, DCI);
13949 case ISD::ABS:
13950 return combineABS(N, DCI);
13951 case ISD::VSELECT:
13952 return combineVSelect(N, DCI);
13953 }
13954
13955 return SDValue();
13956}
13957
13958SDValue
13959PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
13960 SelectionDAG &DAG,
13961 SmallVectorImpl<SDNode *> &Created) const {
13962 // fold (sdiv X, pow2)
13963 EVT VT = N->getValueType(0);
13964 if (VT == MVT::i64 && !Subtarget.isPPC64())
13965 return SDValue();
13966 if ((VT != MVT::i32 && VT != MVT::i64) ||
13967 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
13968 return SDValue();
13969
13970 SDLoc DL(N);
13971 SDValue N0 = N->getOperand(0);
13972
13973 bool IsNegPow2 = (-Divisor).isPowerOf2();
13974 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
13975 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
13976
13977 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
13978 Created.push_back(Op.getNode());
13979
13980 if (IsNegPow2) {
13981 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
13982 Created.push_back(Op.getNode());
13983 }
13984
13985 return Op;
13986}
13987
13988//===----------------------------------------------------------------------===//
13989// Inline Assembly Support
13990//===----------------------------------------------------------------------===//
13991
13992void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13993 KnownBits &Known,
13994 const APInt &DemandedElts,
13995 const SelectionDAG &DAG,
13996 unsigned Depth) const {
13997 Known.resetAll();
13998 switch (Op.getOpcode()) {
13999 default: break;
14000 case PPCISD::LBRX: {
14001 // lhbrx is known to have the top bits cleared out.
14002 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14003 Known.Zero = 0xFFFF0000;
14004 break;
14005 }
14006 case ISD::INTRINSIC_WO_CHAIN: {
14007 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14008 default: break;
14009 case Intrinsic::ppc_altivec_vcmpbfp_p:
14010 case Intrinsic::ppc_altivec_vcmpeqfp_p:
14011 case Intrinsic::ppc_altivec_vcmpequb_p:
14012 case Intrinsic::ppc_altivec_vcmpequh_p:
14013 case Intrinsic::ppc_altivec_vcmpequw_p:
14014 case Intrinsic::ppc_altivec_vcmpequd_p:
14015 case Intrinsic::ppc_altivec_vcmpgefp_p:
14016 case Intrinsic::ppc_altivec_vcmpgtfp_p:
14017 case Intrinsic::ppc_altivec_vcmpgtsb_p:
14018 case Intrinsic::ppc_altivec_vcmpgtsh_p:
14019 case Intrinsic::ppc_altivec_vcmpgtsw_p:
14020 case Intrinsic::ppc_altivec_vcmpgtsd_p:
14021 case Intrinsic::ppc_altivec_vcmpgtub_p:
14022 case Intrinsic::ppc_altivec_vcmpgtuh_p:
14023 case Intrinsic::ppc_altivec_vcmpgtuw_p:
14024 case Intrinsic::ppc_altivec_vcmpgtud_p:
14025 Known.Zero = ~1U; // All bits but the low one are known to be zero.
14026 break;
14027 }
14028 }
14029 }
14030}
14031
14032llvm::Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
14033 switch (Subtarget.getDarwinDirective()) {
14034 default: break;
14035 case PPC::DIR_970:
14036 case PPC::DIR_PWR4:
14037 case PPC::DIR_PWR5:
14038 case PPC::DIR_PWR5X:
14039 case PPC::DIR_PWR6:
14040 case PPC::DIR_PWR6X:
14041 case PPC::DIR_PWR7:
14042 case PPC::DIR_PWR8:
14043 case PPC::DIR_PWR9: {
14044 if (!ML)
14045 break;
14046
14047 if (!DisableInnermostLoopAlign32) {
14048 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14049 // so that we can decrease cache misses and branch-prediction misses.
14050 // Actual alignment of the loop will depend on the hotness check and other
14051 // logic in alignBlocks.
14052 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14053 return llvm::Align(32);
14054 }
14055
14056 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14057
14058 // For small loops (between 5 and 8 instructions), align to a 32-byte
14059 // boundary so that the entire loop fits in one instruction-cache line.
14060 uint64_t LoopSize = 0;
14061 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14062 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14063 LoopSize += TII->getInstSizeInBytes(*J);
14064 if (LoopSize > 32)
14065 break;
14066 }
14067
14068 if (LoopSize > 16 && LoopSize <= 32)
14069 return llvm::Align(32);
14070
14071 break;
14072 }
14073 }
14074
14075 return TargetLowering::getPrefLoopAlignment(ML);
14076}
14077
14078/// getConstraintType - Given a constraint, return the type of
14079/// constraint it is for this target.
14080PPCTargetLowering::ConstraintType
14081PPCTargetLowering::getConstraintType(StringRef Constraint) const {
14082 if (Constraint.size() == 1) {
14083 switch (Constraint[0]) {
14084 default: break;
14085 case 'b':
14086 case 'r':
14087 case 'f':
14088 case 'd':
14089 case 'v':
14090 case 'y':
14091 return C_RegisterClass;
14092 case 'Z':
14093 // FIXME: While Z does indicate a memory constraint, it specifically
14094 // indicates an r+r address (used in conjunction with the 'y' modifier
14095 // in the replacement string). Currently, we're forcing the base
14096 // register to be r0 in the asm printer (which is interpreted as zero)
14097 // and forming the complete address in the second register. This is
14098 // suboptimal.
14099 return C_Memory;
14100 }
14101 } else if (Constraint == "wc") { // individual CR bits.
14102 return C_RegisterClass;
14103 } else if (Constraint == "wa" || Constraint == "wd" ||
14104 Constraint == "wf" || Constraint == "ws" ||
14105 Constraint == "wi" || Constraint == "ww") {
14106 return C_RegisterClass; // VSX registers.
14107 }
14108 return TargetLowering::getConstraintType(Constraint);
14109}
14110
14111/// Examine constraint type and operand type and determine a weight value.
14112/// This object must already have been set up with the operand type
14113/// and the current alternative constraint selected.
14114TargetLowering::ConstraintWeight
14115PPCTargetLowering::getSingleConstraintMatchWeight(
14116 AsmOperandInfo &info, const char *constraint) const {
14117 ConstraintWeight weight = CW_Invalid;
14118 Value *CallOperandVal = info.CallOperandVal;
14119 // If we don't have a value, we can't do a match,
14120 // but allow it at the lowest weight.
14121 if (!CallOperandVal)
14122 return CW_Default;
14123 Type *type = CallOperandVal->getType();
14124
14125 // Look at the constraint type.
14126 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
14127 return CW_Register; // an individual CR bit.
14128 else if ((StringRef(constraint) == "wa" ||
14129 StringRef(constraint) == "wd" ||
14130 StringRef(constraint) == "wf") &&
14131 type->isVectorTy())
14132 return CW_Register;
14133 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
14134 return CW_Register; // just hold 64-bit integers data.
14135 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
14136 return CW_Register;
14137 else if (StringRef(constraint) == "ww" && type->isFloatTy())
14138 return CW_Register;
14139
14140 switch (*constraint) {
14141 default:
14142 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
14143 break;
14144 case 'b':
14145 if (type->isIntegerTy())
14146 weight = CW_Register;
14147 break;
14148 case 'f':
14149 if (type->isFloatTy())
14150 weight = CW_Register;
14151 break;
14152 case 'd':
14153 if (type->isDoubleTy())
14154 weight = CW_Register;
14155 break;
14156 case 'v':
14157 if (type->isVectorTy())
14158 weight = CW_Register;
14159 break;
14160 case 'y':
14161 weight = CW_Register;
14162 break;
14163 case 'Z':
14164 weight = CW_Memory;
14165 break;
14166 }
14167 return weight;
14168}
14169
14170std::pair<unsigned, const TargetRegisterClass *>
14171PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
14172 StringRef Constraint,
14173 MVT VT) const {
14174 if (Constraint.size() == 1) {
14175 // GCC RS6000 Constraint Letters
14176 switch (Constraint[0]) {
14177 case 'b': // R1-R31
14178 if (VT == MVT::i64 && Subtarget.isPPC64())
14179 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
14180 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
14181 case 'r': // R0-R31
14182 if (VT == MVT::i64 && Subtarget.isPPC64())
14183 return std::make_pair(0U, &PPC::G8RCRegClass);
14184 return std::make_pair(0U, &PPC::GPRCRegClass);
14185 // 'd' and 'f' constraints are both defined to be "the floating point
14186 // registers", where one is for 32-bit and the other for 64-bit. We don't
14187 // really care overly much here so just give them all the same reg classes.
14188 case 'd':
14189 case 'f':
14190 if (Subtarget.hasSPE()) {
14191 if (VT == MVT::f32 || VT == MVT::i32)
14192 return std::make_pair(0U, &PPC::GPRCRegClass);
14193 if (VT == MVT::f64 || VT == MVT::i64)
14194 return std::make_pair(0U, &PPC::SPERCRegClass);
14195 } else {
14196 if (VT == MVT::f32 || VT == MVT::i32)
14197 return std::make_pair(0U, &PPC::F4RCRegClass);
14198 if (VT == MVT::f64 || VT == MVT::i64)
14199 return std::make_pair(0U, &PPC::F8RCRegClass);
14200 if (VT == MVT::v4f64 && Subtarget.hasQPX())
14201 return std::make_pair(0U, &PPC::QFRCRegClass);
14202 if (VT == MVT::v4f32 && Subtarget.hasQPX())
14203 return std::make_pair(0U, &PPC::QSRCRegClass);
14204 }
14205 break;
14206 case 'v':
14207 if (VT == MVT::v4f64 && Subtarget.hasQPX())
14208 return std::make_pair(0U, &PPC::QFRCRegClass);
14209 if (VT == MVT::v4f32 && Subtarget.hasQPX())
14210 return std::make_pair(0U, &PPC::QSRCRegClass);
14211 if (Subtarget.hasAltivec())
14212 return std::make_pair(0U, &PPC::VRRCRegClass);
14213 break;
14214 case 'y': // crrc
14215 return std::make_pair(0U, &PPC::CRRCRegClass);
14216 }
14217 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
14218 // An individual CR bit.
14219 return std::make_pair(0U, &PPC::CRBITRCRegClass);
14220 } else if ((Constraint == "wa" || Constraint == "wd" ||
14221 Constraint == "wf" || Constraint == "wi") &&
14222 Subtarget.hasVSX()) {
14223 return std::make_pair(0U, &PPC::VSRCRegClass);
14224 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
14225 if (VT == MVT::f32 && Subtarget.hasP8Vector())
14226 return std::make_pair(0U, &PPC::VSSRCRegClass);
14227 else
14228 return std::make_pair(0U, &PPC::VSFRCRegClass);
14229 }
14230
14231 std::pair<unsigned, const TargetRegisterClass *> R =
14232 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
14233
14234 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14235 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14236 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14237 // register.
14238 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14239 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14240 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
14241 PPC::GPRCRegClass.contains(R.first))
14242 return std::make_pair(TRI->getMatchingSuperReg(R.first,
14243 PPC::sub_32, &PPC::G8RCRegClass),
14244 &PPC::G8RCRegClass);
14245
14246 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14247 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
14248 R.first = PPC::CR0;
14249 R.second = &PPC::CRRCRegClass;
14250 }
14251
14252 return R;
14253}
14254
14255/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14256/// vector. If it is invalid, don't add anything to Ops.
14257void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
14258 std::string &Constraint,
14259 std::vector<SDValue>&Ops,
14260 SelectionDAG &DAG) const {
14261 SDValue Result;
14262
14263 // Only support length 1 constraints.
14264 if (Constraint.length() > 1) return;
14265
14266 char Letter = Constraint[0];
14267 switch (Letter) {
14268 default: break;
14269 case 'I':
14270 case 'J':
14271 case 'K':
14272 case 'L':
14273 case 'M':
14274 case 'N':
14275 case 'O':
14276 case 'P': {
14277 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
14278 if (!CST) return; // Must be an immediate to match.
14279 SDLoc dl(Op);
14280 int64_t Value = CST->getSExtValue();
14281 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
14282 // numbers are printed as such.
14283 switch (Letter) {
14284 default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14284)
;
14285 case 'I': // "I" is a signed 16-bit constant.
14286 if (isInt<16>(Value))
14287 Result = DAG.getTargetConstant(Value, dl, TCVT);
14288 break;
14289 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
14290 if (isShiftedUInt<16, 16>(Value))
14291 Result = DAG.getTargetConstant(Value, dl, TCVT);
14292 break;
14293 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
14294 if (isShiftedInt<16, 16>(Value))
14295 Result = DAG.getTargetConstant(Value, dl, TCVT);
14296 break;
14297 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
14298 if (isUInt<16>(Value))
14299 Result = DAG.getTargetConstant(Value, dl, TCVT);
14300 break;
14301 case 'M': // "M" is a constant that is greater than 31.
14302 if (Value > 31)
14303 Result = DAG.getTargetConstant(Value, dl, TCVT);
14304 break;
14305 case 'N': // "N" is a positive constant that is an exact power of two.
14306 if (Value > 0 && isPowerOf2_64(Value))
14307 Result = DAG.getTargetConstant(Value, dl, TCVT);
14308 break;
14309 case 'O': // "O" is the constant zero.
14310 if (Value == 0)
14311 Result = DAG.getTargetConstant(Value, dl, TCVT);
14312 break;
14313 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
14314 if (isInt<16>(-Value))
14315 Result = DAG.getTargetConstant(Value, dl, TCVT);
14316 break;
14317 }
14318 break;
14319 }
14320 }
14321
14322 if (Result.getNode()) {
14323 Ops.push_back(Result);
14324 return;
14325 }
14326
14327 // Handle standard constraint letters.
14328 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14329}
14330
14331// isLegalAddressingMode - Return true if the addressing mode represented
14332// by AM is legal for this target, for a load/store of the specified type.
14333bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
14334 const AddrMode &AM, Type *Ty,
14335 unsigned AS, Instruction *I) const {
14336 // PPC does not allow r+i addressing modes for vectors!
14337 if (Ty->isVectorTy() && AM.BaseOffs != 0)
14338 return false;
14339
14340 // PPC allows a sign-extended 16-bit immediate field.
14341 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
14342 return false;
14343
14344 // No global is ever allowed as a base.
14345 if (AM.BaseGV)
14346 return false;
14347
14348 // PPC only support r+r,
14349 switch (AM.Scale) {
14350 case 0: // "r+i" or just "i", depending on HasBaseReg.
14351 break;
14352 case 1:
14353 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
14354 return false;
14355 // Otherwise we have r+r or r+i.
14356 break;
14357 case 2:
14358 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
14359 return false;
14360 // Allow 2*r as r+r.
14361 break;
14362 default:
14363 // No other scales are supported.
14364 return false;
14365 }
14366
14367 return true;
14368}
14369
14370SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
14371 SelectionDAG &DAG) const {
14372 MachineFunction &MF = DAG.getMachineFunction();
14373 MachineFrameInfo &MFI = MF.getFrameInfo();
14374 MFI.setReturnAddressIsTaken(true);
14375
14376 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
14377 return SDValue();
14378
14379 SDLoc dl(Op);
14380 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14381
14382 // Make sure the function does not optimize away the store of the RA to
14383 // the stack.
14384 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
14385 FuncInfo->setLRStoreRequired();
14386 bool isPPC64 = Subtarget.isPPC64();
14387 auto PtrVT = getPointerTy(MF.getDataLayout());
14388
14389 if (Depth > 0) {
14390 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
14391 SDValue Offset =
14392 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
14393 isPPC64 ? MVT::i64 : MVT::i32);
14394 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
14395 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
14396 MachinePointerInfo());
14397 }
14398
14399 // Just load the return address off the stack.
14400 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
14401 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
14402 MachinePointerInfo());
14403}
14404
14405SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
14406 SelectionDAG &DAG) const {
14407 SDLoc dl(Op);
14408 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14409
14410 MachineFunction &MF = DAG.getMachineFunction();
14411 MachineFrameInfo &MFI = MF.getFrameInfo();
14412 MFI.setFrameAddressIsTaken(true);
14413
14414 EVT PtrVT = getPointerTy(MF.getDataLayout());
14415 bool isPPC64 = PtrVT == MVT::i64;
14416
14417 // Naked functions never have a frame pointer, and so we use r1. For all
14418 // other functions, this decision must be delayed until during PEI.
14419 unsigned FrameReg;
14420 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
14421 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
14422 else
14423 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
14424
14425 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
14426 PtrVT);
14427 while (Depth--)
14428 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
14429 FrameAddr, MachinePointerInfo());
14430 return FrameAddr;
14431}
14432
14433// FIXME? Maybe this could be a TableGen attribute on some registers and
14434// this table could be generated automatically from RegInfo.
14435unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
14436 SelectionDAG &DAG) const {
14437 bool isPPC64 = Subtarget.isPPC64();
14438 bool isDarwinABI = Subtarget.isDarwinABI();
14439
14440 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
14441 (!isPPC64 && VT != MVT::i32))
14442 report_fatal_error("Invalid register global variable type");
14443
14444 bool is64Bit = isPPC64 && VT == MVT::i64;
14445 unsigned Reg = StringSwitch<unsigned>(RegName)
14446 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
14447 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
14448 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
14449 (is64Bit ? PPC::X13 : PPC::R13))
14450 .Default(0);
14451
14452 if (Reg)
14453 return Reg;
14454 report_fatal_error("Invalid register name global variable");
14455}
14456
14457bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
14458 // 32-bit SVR4 ABI access everything as got-indirect.
14459 if (Subtarget.is32BitELFABI())
14460 return true;
14461
14462 // AIX accesses everything indirectly through the TOC, which is similar to
14463 // the GOT.
14464 if (Subtarget.isAIXABI())
14465 return true;
14466
14467 CodeModel::Model CModel = getTargetMachine().getCodeModel();
14468 // If it is small or large code model, module locals are accessed
14469 // indirectly by loading their address from .toc/.got.
14470 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
14471 return true;
14472
14473 // JumpTable and BlockAddress are accessed as got-indirect.
14474 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
14475 return true;
14476
14477 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
14478 const GlobalValue *GV = G->getGlobal();
14479 unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
14480 // The NLP flag indicates that a global access has to use an
14481 // extra indirection.
14482 if (GVFlags & PPCII::MO_NLP_FLAG)
14483 return true;
14484 }
14485
14486 return false;
14487}
14488
14489bool
14490PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
14491 // The PowerPC target isn't yet aware of offsets.
14492 return false;
14493}
14494
14495bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
14496 const CallInst &I,
14497 MachineFunction &MF,
14498 unsigned Intrinsic) const {
14499 switch (Intrinsic) {
14500 case Intrinsic::ppc_qpx_qvlfd:
14501 case Intrinsic::ppc_qpx_qvlfs:
14502 case Intrinsic::ppc_qpx_qvlfcd:
14503 case Intrinsic::ppc_qpx_qvlfcs:
14504 case Intrinsic::ppc_qpx_qvlfiwa:
14505 case Intrinsic::ppc_qpx_qvlfiwz:
14506 case Intrinsic::ppc_altivec_lvx:
14507 case Intrinsic::ppc_altivec_lvxl:
14508 case Intrinsic::ppc_altivec_lvebx:
14509 case Intrinsic::ppc_altivec_lvehx:
14510 case Intrinsic::ppc_altivec_lvewx:
14511 case Intrinsic::ppc_vsx_lxvd2x:
14512 case Intrinsic::ppc_vsx_lxvw4x: {
14513 EVT VT;
14514 switch (Intrinsic) {
14515 case Intrinsic::ppc_altivec_lvebx:
14516 VT = MVT::i8;
14517 break;
14518 case Intrinsic::ppc_altivec_lvehx:
14519 VT = MVT::i16;
14520 break;
14521 case Intrinsic::ppc_altivec_lvewx:
14522 VT = MVT::i32;
14523 break;
14524 case Intrinsic::ppc_vsx_lxvd2x:
14525 VT = MVT::v2f64;
14526 break;
14527 case Intrinsic::ppc_qpx_qvlfd:
14528 VT = MVT::v4f64;
14529 break;
14530 case Intrinsic::ppc_qpx_qvlfs:
14531 VT = MVT::v4f32;
14532 break;
14533 case Intrinsic::ppc_qpx_qvlfcd:
14534 VT = MVT::v2f64;
14535 break;
14536 case Intrinsic::ppc_qpx_qvlfcs:
14537 VT = MVT::v2f32;
14538 break;
14539 default:
14540 VT = MVT::v4i32;
14541 break;
14542 }
14543
14544 Info.opc = ISD::INTRINSIC_W_CHAIN;
14545 Info.memVT = VT;
14546 Info.ptrVal = I.getArgOperand(0);
14547 Info.offset = -VT.getStoreSize()+1;
14548 Info.size = 2*VT.getStoreSize()-1;
14549 Info.align = Align(1);
14550 Info.flags = MachineMemOperand::MOLoad;
14551 return true;
14552 }
14553 case Intrinsic::ppc_qpx_qvlfda:
14554 case Intrinsic::ppc_qpx_qvlfsa:
14555 case Intrinsic::ppc_qpx_qvlfcda:
14556 case Intrinsic::ppc_qpx_qvlfcsa:
14557 case Intrinsic::ppc_qpx_qvlfiwaa:
14558 case Intrinsic::ppc_qpx_qvlfiwza: {
14559 EVT VT;
14560 switch (Intrinsic) {
14561 case Intrinsic::ppc_qpx_qvlfda:
14562 VT = MVT::v4f64;
14563 break;
14564 case Intrinsic::ppc_qpx_qvlfsa:
14565 VT = MVT::v4f32;
14566 break;
14567 case Intrinsic::ppc_qpx_qvlfcda:
14568 VT = MVT::v2f64;
14569 break;
14570 case Intrinsic::ppc_qpx_qvlfcsa:
14571 VT = MVT::v2f32;
14572 break;
14573 default:
14574 VT = MVT::v4i32;
14575 break;
14576 }
14577
14578 Info.opc = ISD::INTRINSIC_W_CHAIN;
14579 Info.memVT = VT;
14580 Info.ptrVal = I.getArgOperand(0);
14581 Info.offset = 0;
14582 Info.size = VT.getStoreSize();
14583 Info.align = Align(1);
14584 Info.flags = MachineMemOperand::MOLoad;
14585 return true;
14586 }
14587 case Intrinsic::ppc_qpx_qvstfd:
14588 case Intrinsic::ppc_qpx_qvstfs:
14589 case Intrinsic::ppc_qpx_qvstfcd:
14590 case Intrinsic::ppc_qpx_qvstfcs:
14591 case Intrinsic::ppc_qpx_qvstfiw:
14592 case Intrinsic::ppc_altivec_stvx:
14593 case Intrinsic::ppc_altivec_stvxl:
14594 case Intrinsic::ppc_altivec_stvebx:
14595 case Intrinsic::ppc_altivec_stvehx:
14596 case Intrinsic::ppc_altivec_stvewx:
14597 case Intrinsic::ppc_vsx_stxvd2x:
14598 case Intrinsic::ppc_vsx_stxvw4x: {
14599 EVT VT;
14600 switch (Intrinsic) {
14601 case Intrinsic::ppc_altivec_stvebx:
14602 VT = MVT::i8;
14603 break;
14604 case Intrinsic::ppc_altivec_stvehx:
14605 VT = MVT::i16;
14606 break;
14607 case Intrinsic::ppc_altivec_stvewx:
14608 VT = MVT::i32;
14609 break;
14610 case Intrinsic::ppc_vsx_stxvd2x:
14611 VT = MVT::v2f64;
14612 break;
14613 case Intrinsic::ppc_qpx_qvstfd:
14614 VT = MVT::v4f64;
14615 break;
14616 case Intrinsic::ppc_qpx_qvstfs:
14617 VT = MVT::v4f32;
14618 break;
14619 case Intrinsic::ppc_qpx_qvstfcd:
14620 VT = MVT::v2f64;
14621 break;
14622 case Intrinsic::ppc_qpx_qvstfcs:
14623 VT = MVT::v2f32;
14624 break;
14625 default:
14626 VT = MVT::v4i32;
14627 break;
14628 }
14629
14630 Info.opc = ISD::INTRINSIC_VOID;
14631 Info.memVT = VT;
14632 Info.ptrVal = I.getArgOperand(1);
14633 Info.offset = -VT.getStoreSize()+1;
14634 Info.size = 2*VT.getStoreSize()-1;
14635 Info.align = Align(1);
14636 Info.flags = MachineMemOperand::MOStore;
14637 return true;
14638 }
14639 case Intrinsic::ppc_qpx_qvstfda:
14640 case Intrinsic::ppc_qpx_qvstfsa:
14641 case Intrinsic::ppc_qpx_qvstfcda:
14642 case Intrinsic::ppc_qpx_qvstfcsa:
14643 case Intrinsic::ppc_qpx_qvstfiwa: {
14644 EVT VT;
14645 switch (Intrinsic) {
14646 case Intrinsic::ppc_qpx_qvstfda:
14647 VT = MVT::v4f64;
14648 break;
14649 case Intrinsic::ppc_qpx_qvstfsa:
14650 VT = MVT::v4f32;
14651 break;
14652 case Intrinsic::ppc_qpx_qvstfcda:
14653 VT = MVT::v2f64;
14654 break;
14655 case Intrinsic::ppc_qpx_qvstfcsa:
14656 VT = MVT::v2f32;
14657 break;
14658 default:
14659 VT = MVT::v4i32;
14660 break;
14661 }
14662
14663 Info.opc = ISD::INTRINSIC_VOID;
14664 Info.memVT = VT;
14665 Info.ptrVal = I.getArgOperand(1);
14666 Info.offset = 0;
14667 Info.size = VT.getStoreSize();
14668 Info.align = Align(1);
14669 Info.flags = MachineMemOperand::MOStore;
14670 return true;
14671 }
14672 default:
14673 break;
14674 }
14675
14676 return false;
14677}
14678
14679/// getOptimalMemOpType - Returns the target specific optimal type for load
14680/// and store operations as a result of memset, memcpy, and memmove
14681/// lowering. If DstAlign is zero that means it's safe to destination
14682/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14683/// means there isn't a need to check it against alignment requirement,
14684/// probably because the source does not need to be loaded. If 'IsMemset' is
14685/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14686/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14687/// source is constant so it does not need to be loaded.
14688/// It returns EVT::Other if the type should be determined using generic
14689/// target-independent logic.
14690EVT PPCTargetLowering::getOptimalMemOpType(
14691 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
14692 bool ZeroMemset, bool MemcpyStrSrc,
14693 const AttributeList &FuncAttributes) const {
14694 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
14695 // When expanding a memset, require at least two QPX instructions to cover
14696 // the cost of loading the value to be stored from the constant pool.
14697 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
14698 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
14699 !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
14700 return MVT::v4f64;
14701 }
14702
14703 // We should use Altivec/VSX loads and stores when available. For unaligned
14704 // addresses, unaligned VSX loads are only fast starting with the P8.
14705 if (Subtarget.hasAltivec() && Size >= 16 &&
14706 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
14707 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
14708 return MVT::v4i32;
14709 }
14710
14711 if (Subtarget.isPPC64()) {
14712 return MVT::i64;
14713 }
14714
14715 return MVT::i32;
14716}
14717
14718/// Returns true if it is beneficial to convert a load of a constant
14719/// to just the constant itself.
14720bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
14721 Type *Ty) const {
14722 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14722, __PRETTY_FUNCTION__))
;
14723
14724 unsigned BitSize = Ty->getPrimitiveSizeInBits();
14725 return !(BitSize == 0 || BitSize > 64);
14726}
14727
14728bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
14729 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
14730 return false;
14731 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
14732 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
14733 return NumBits1 == 64 && NumBits2 == 32;
14734}
14735
14736bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
14737 if (!VT1.isInteger() || !VT2.isInteger())
14738 return false;
14739 unsigned NumBits1 = VT1.getSizeInBits();
14740 unsigned NumBits2 = VT2.getSizeInBits();
14741 return NumBits1 == 64 && NumBits2 == 32;
14742}
14743
14744bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
14745 // Generally speaking, zexts are not free, but they are free when they can be
14746 // folded with other operations.
14747 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
14748 EVT MemVT = LD->getMemoryVT();
14749 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
14750 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
14751 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
14752 LD->getExtensionType() == ISD::ZEXTLOAD))
14753 return true;
14754 }
14755
14756 // FIXME: Add other cases...
14757 // - 32-bit shifts with a zext to i64
14758 // - zext after ctlz, bswap, etc.
14759 // - zext after and by a constant mask
14760
14761 return TargetLowering::isZExtFree(Val, VT2);
14762}
14763
14764bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
14765 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14766, __PRETTY_FUNCTION__))
14766 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14766, __PRETTY_FUNCTION__))
;
14767 // Extending to float128 is not free.
14768 if (DestVT == MVT::f128)
14769 return false;
14770 return true;
14771}
14772
14773bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
14774 return isInt<16>(Imm) || isUInt<16>(Imm);
14775}
14776
14777bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
14778 return isInt<16>(Imm) || isUInt<16>(Imm);
14779}
14780
14781bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
14782 unsigned,
14783 unsigned,
14784 MachineMemOperand::Flags,
14785 bool *Fast) const {
14786 if (DisablePPCUnaligned)
14787 return false;
14788
14789 // PowerPC supports unaligned memory access for simple non-vector types.
14790 // Although accessing unaligned addresses is not as efficient as accessing
14791 // aligned addresses, it is generally more efficient than manual expansion,
14792 // and generally only traps for software emulation when crossing page
14793 // boundaries.
14794
14795 if (!VT.isSimple())
14796 return false;
14797
14798 if (VT.getSimpleVT().isVector()) {
14799 if (Subtarget.hasVSX()) {
14800 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
14801 VT != MVT::v4f32 && VT != MVT::v4i32)
14802 return false;
14803 } else {
14804 return false;
14805 }
14806 }
14807
14808 if (VT == MVT::ppcf128)
14809 return false;
14810
14811 if (Fast)
14812 *Fast = true;
14813
14814 return true;
14815}
14816
14817bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
14818 VT = VT.getScalarType();
14819
14820 if (!VT.isSimple())
14821 return false;
14822
14823 switch (VT.getSimpleVT().SimpleTy) {
14824 case MVT::f32:
14825 case MVT::f64:
14826 return true;
14827 case MVT::f128:
14828 return (EnableQuadPrecision && Subtarget.hasP9Vector());
14829 default:
14830 break;
14831 }
14832
14833 return false;
14834}
14835
14836const MCPhysReg *
14837PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
14838 // LR is a callee-save register, but we must treat it as clobbered by any call
14839 // site. Hence we include LR in the scratch registers, which are in turn added
14840 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14841 // to CTR, which is used by any indirect call.
14842 static const MCPhysReg ScratchRegs[] = {
14843 PPC::X12, PPC::LR8, PPC::CTR8, 0
14844 };
14845
14846 return ScratchRegs;
14847}
14848
14849unsigned PPCTargetLowering::getExceptionPointerRegister(
14850 const Constant *PersonalityFn) const {
14851 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
14852}
14853
14854unsigned PPCTargetLowering::getExceptionSelectorRegister(
14855 const Constant *PersonalityFn) const {
14856 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
14857}
14858
14859bool
14860PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14861 EVT VT , unsigned DefinedValues) const {
14862 if (VT == MVT::v2i64)
14863 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
14864
14865 if (Subtarget.hasVSX() || Subtarget.hasQPX())
14866 return true;
14867
14868 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14869}
14870
14871Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
14872 if (DisableILPPref || Subtarget.enableMachineScheduler())
14873 return TargetLowering::getSchedulingPreference(N);
14874
14875 return Sched::ILP;
14876}
14877
14878// Create a fast isel object.
14879FastISel *
14880PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
14881 const TargetLibraryInfo *LibInfo) const {
14882 return PPC::createFastISel(FuncInfo, LibInfo);
14883}
14884
14885void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
14886 if (Subtarget.isDarwinABI()) return;
14887 if (!Subtarget.isPPC64()) return;
14888
14889 // Update IsSplitCSR in PPCFunctionInfo
14890 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
14891 PFI->setIsSplitCSR(true);
14892}
14893
14894void PPCTargetLowering::insertCopiesSplitCSR(
14895 MachineBasicBlock *Entry,
14896 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14897 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
14898 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14899 if (!IStart)
14900 return;
14901
14902 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14903 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14904 MachineBasicBlock::iterator MBBI = Entry->begin();
14905 for (const MCPhysReg *I = IStart; *I; ++I) {
14906 const TargetRegisterClass *RC = nullptr;
14907 if (PPC::G8RCRegClass.contains(*I))
14908 RC = &PPC::G8RCRegClass;
14909 else if (PPC::F8RCRegClass.contains(*I))
14910 RC = &PPC::F8RCRegClass;
14911 else if (PPC::CRRCRegClass.contains(*I))
14912 RC = &PPC::CRRCRegClass;
14913 else if (PPC::VRRCRegClass.contains(*I))
14914 RC = &PPC::VRRCRegClass;
14915 else
14916 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14916)
;
14917
14918 Register NewVR = MRI->createVirtualRegister(RC);
14919 // Create copy from CSR to a virtual register.
14920 // FIXME: this currently does not emit CFI pseudo-instructions, it works
14921 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14922 // nounwind. If we want to generalize this later, we may need to emit
14923 // CFI pseudo-instructions.
14924 assert(Entry->getParent()->getFunction().hasFnAttribute(((Entry->getParent()->getFunction().hasFnAttribute( Attribute
::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? static_cast<void> (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14926, __PRETTY_FUNCTION__))
14925 Attribute::NoUnwind) &&((Entry->getParent()->getFunction().hasFnAttribute( Attribute
::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? static_cast<void> (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14926, __PRETTY_FUNCTION__))
14926 "Function should be nounwind in insertCopiesSplitCSR!")((Entry->getParent()->getFunction().hasFnAttribute( Attribute
::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? static_cast<void> (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14926, __PRETTY_FUNCTION__))
;
14927 Entry->addLiveIn(*I);
14928 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
14929 .addReg(*I);
14930
14931 // Insert the copy-back instructions right before the terminator.
14932 for (auto *Exit : Exits)
14933 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
14934 TII->get(TargetOpcode::COPY), *I)
14935 .addReg(NewVR);
14936 }
14937}
14938
14939// Override to enable LOAD_STACK_GUARD lowering on Linux.
14940bool PPCTargetLowering::useLoadStackGuardNode() const {
14941 if (!Subtarget.isTargetLinux())
14942 return TargetLowering::useLoadStackGuardNode();
14943 return true;
14944}
14945
14946// Override to disable global variable loading on Linux.
14947void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
14948 if (!Subtarget.isTargetLinux())
14949 return TargetLowering::insertSSPDeclarations(M);
14950}
14951
14952bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
14953 bool ForCodeSize) const {
14954 if (!VT.isSimple() || !Subtarget.hasVSX())
14955 return false;
14956
14957 switch(VT.getSimpleVT().SimpleTy) {
14958 default:
14959 // For FP types that are currently not supported by PPC backend, return
14960 // false. Examples: f16, f80.
14961 return false;
14962 case MVT::f32:
14963 case MVT::f64:
14964 case MVT::ppcf128:
14965 return Imm.isPosZero();
14966 }
14967}
14968
14969// For vector shift operation op, fold
14970// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14971static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
14972 SelectionDAG &DAG) {
14973 SDValue N0 = N->getOperand(0);
14974 SDValue N1 = N->getOperand(1);
14975 EVT VT = N0.getValueType();
14976 unsigned OpSizeInBits = VT.getScalarSizeInBits();
14977 unsigned Opcode = N->getOpcode();
14978 unsigned TargetOpcode;
14979
14980 switch (Opcode) {
14981 default:
14982 llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14982)
;
14983 case ISD::SHL:
14984 TargetOpcode = PPCISD::SHL;
14985 break;
14986 case ISD::SRL:
14987 TargetOpcode = PPCISD::SRL;
14988 break;
14989 case ISD::SRA:
14990 TargetOpcode = PPCISD::SRA;
14991 break;
14992 }
14993
14994 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
14995 N1->getOpcode() == ISD::AND)
14996 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
14997 if (Mask->getZExtValue() == OpSizeInBits - 1)
14998 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
14999
15000 return SDValue();
15001}
15002
15003SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15004 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15005 return Value;
15006
15007 SDValue N0 = N->getOperand(0);
15008 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15009 if (!Subtarget.isISA3_0() ||
15010 N0.getOpcode() != ISD::SIGN_EXTEND ||
15011 N0.getOperand(0).getValueType() != MVT::i32 ||
15012 CN1 == nullptr || N->getValueType(0) != MVT::i64)
15013 return SDValue();
15014
15015 // We can't save an operation here if the value is already extended, and
15016 // the existing shift is easier to combine.
15017 SDValue ExtsSrc = N0.getOperand(0);
15018 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15019 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15020 return SDValue();
15021
15022 SDLoc DL(N0);
15023 SDValue ShiftBy = SDValue(CN1, 0);
15024 // We want the shift amount to be i32 on the extswli, but the shift could
15025 // have an i64.
15026 if (ShiftBy.getValueType() == MVT::i64)
15027 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15028
15029 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15030 ShiftBy);
15031}
15032
15033SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15034 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15035 return Value;
15036
15037 return SDValue();
15038}
15039
15040SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15041 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15042 return Value;
15043
15044 return SDValue();
15045}
15046
15047// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15048// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15049// When C is zero, the equation (addi Z, -C) can be simplified to Z
15050// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15051static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
15052 const PPCSubtarget &Subtarget) {
15053 if (!Subtarget.isPPC64())
15054 return SDValue();
15055
15056 SDValue LHS = N->getOperand(0);
15057 SDValue RHS = N->getOperand(1);
15058
15059 auto isZextOfCompareWithConstant = [](SDValue Op) {
15060 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
15061 Op.getValueType() != MVT::i64)
15062 return false;
15063
15064 SDValue Cmp = Op.getOperand(0);
15065 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
15066 Cmp.getOperand(0).getValueType() != MVT::i64)
15067 return false;
15068
15069 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
15070 int64_t NegConstant = 0 - Constant->getSExtValue();
15071 // Due to the limitations of the addi instruction,
15072 // -C is required to be [-32768, 32767].
15073 return isInt<16>(NegConstant);
15074 }
15075
15076 return false;
15077 };
15078
15079 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
15080 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
15081
15082 // If there is a pattern, canonicalize a zext operand to the RHS.
15083 if (LHSHasPattern && !RHSHasPattern)
15084 std::swap(LHS, RHS);
15085 else if (!LHSHasPattern && !RHSHasPattern)
15086 return SDValue();
15087
15088 SDLoc DL(N);
15089 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
15090 SDValue Cmp = RHS.getOperand(0);
15091 SDValue Z = Cmp.getOperand(0);
15092 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
15093
15094 assert(Constant && "Constant Should not be a null pointer.")((Constant && "Constant Should not be a null pointer."
) ? static_cast<void> (0) : __assert_fail ("Constant && \"Constant Should not be a null pointer.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15094, __PRETTY_FUNCTION__))
;
15095 int64_t NegConstant = 0 - Constant->getSExtValue();
15096
15097 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
15098 default: break;
15099 case ISD::SETNE: {
15100 // when C == 0
15101 // --> addze X, (addic Z, -1).carry
15102 // /
15103 // add X, (zext(setne Z, C))--
15104 // \ when -32768 <= -C <= 32767 && C != 0
15105 // --> addze X, (addic (addi Z, -C), -1).carry
15106 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15107 DAG.getConstant(NegConstant, DL, MVT::i64));
15108 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15109 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15110 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
15111 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15112 SDValue(Addc.getNode(), 1));
15113 }
15114 case ISD::SETEQ: {
15115 // when C == 0
15116 // --> addze X, (subfic Z, 0).carry
15117 // /
15118 // add X, (zext(sete Z, C))--
15119 // \ when -32768 <= -C <= 32767 && C != 0
15120 // --> addze X, (subfic (addi Z, -C), 0).carry
15121 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15122 DAG.getConstant(NegConstant, DL, MVT::i64));
15123 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15124 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15125 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
15126 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15127 SDValue(Subc.getNode(), 1));
15128 }
15129 }
15130
15131 return SDValue();
15132}
15133
15134SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
15135 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
15136 return Value;
15137
15138 return SDValue();
15139}
15140
15141// Detect TRUNCATE operations on bitcasts of float128 values.
15142// What we are looking for here is the situtation where we extract a subset
15143// of bits from a 128 bit float.
15144// This can be of two forms:
15145// 1) BITCAST of f128 feeding TRUNCATE
15146// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15147// The reason this is required is because we do not have a legal i128 type
15148// and so we want to prevent having to store the f128 and then reload part
15149// of it.
15150SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
15151 DAGCombinerInfo &DCI) const {
15152 // If we are using CRBits then try that first.
15153 if (Subtarget.useCRBits()) {
15154 // Check if CRBits did anything and return that if it did.
15155 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
15156 return CRTruncValue;
15157 }
15158
15159 SDLoc dl(N);
15160 SDValue Op0 = N->getOperand(0);
15161
15162 // Looking for a truncate of i128 to i64.
15163 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
15164 return SDValue();
15165
15166 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
15167
15168 // SRL feeding TRUNCATE.
15169 if (Op0.getOpcode() == ISD::SRL) {
15170 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
15171 // The right shift has to be by 64 bits.
15172 if (!ConstNode || ConstNode->getZExtValue() != 64)
15173 return SDValue();
15174
15175 // Switch the element number to extract.
15176 EltToExtract = EltToExtract ? 0 : 1;
15177 // Update Op0 past the SRL.
15178 Op0 = Op0.getOperand(0);
15179 }
15180
15181 // BITCAST feeding a TRUNCATE possibly via SRL.
15182 if (Op0.getOpcode() == ISD::BITCAST &&
15183 Op0.getValueType() == MVT::i128 &&
15184 Op0.getOperand(0).getValueType() == MVT::f128) {
15185 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
15186 return DCI.DAG.getNode(
15187 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
15188 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
15189 }
15190 return SDValue();
15191}
15192
15193SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
15194 SelectionDAG &DAG = DCI.DAG;
15195
15196 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
15197 if (!ConstOpOrElement)
15198 return SDValue();
15199
15200 // An imul is usually smaller than the alternative sequence for legal type.
15201 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
15202 isOperationLegal(ISD::MUL, N->getValueType(0)))
15203 return SDValue();
15204
15205 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
15206 switch (this->Subtarget.getDarwinDirective()) {
15207 default:
15208 // TODO: enhance the condition for subtarget before pwr8
15209 return false;
15210 case PPC::DIR_PWR8:
15211 // type mul add shl
15212 // scalar 4 1 1
15213 // vector 7 2 2
15214 return true;
15215 case PPC::DIR_PWR9:
15216 // type mul add shl
15217 // scalar 5 2 2
15218 // vector 7 2 2
15219
15220 // The cycle RATIO of related operations are showed as a table above.
15221 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15222 // scalar and vector type. For 2 instrs patterns, add/sub + shl
15223 // are 4, it is always profitable; but for 3 instrs patterns
15224 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15225 // So we should only do it for vector type.
15226 return IsAddOne && IsNeg ? VT.isVector() : true;
15227 }
15228 };
15229
15230 EVT VT = N->getValueType(0);
15231 SDLoc DL(N);
15232
15233 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
15234 bool IsNeg = MulAmt.isNegative();
15235 APInt MulAmtAbs = MulAmt.abs();
15236
15237 if ((MulAmtAbs - 1).isPowerOf2()) {
15238 // (mul x, 2^N + 1) => (add (shl x, N), x)
15239 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15240
15241 if (!IsProfitable(IsNeg, true, VT))
15242 return SDValue();
15243
15244 SDValue Op0 = N->getOperand(0);
15245 SDValue Op1 =
15246 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15247 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
15248 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
15249
15250 if (!IsNeg)
15251 return Res;
15252
15253 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
15254 } else if ((MulAmtAbs + 1).isPowerOf2()) {
15255 // (mul x, 2^N - 1) => (sub (shl x, N), x)
15256 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15257
15258 if (!IsProfitable(IsNeg, false, VT))
15259 return SDValue();
15260
15261 SDValue Op0 = N->getOperand(0);
15262 SDValue Op1 =
15263 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15264 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
15265
15266 if (!IsNeg)
15267 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
15268 else
15269 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
15270
15271 } else {
15272 return SDValue();
15273 }
15274}
15275
15276bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
15277 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15278 if (!Subtarget.is64BitELFABI())
15279 return false;
15280
15281 // If not a tail call then no need to proceed.
15282 if (!CI->isTailCall())
15283 return false;
15284
15285 // If tail calls are disabled for the caller then we are done.
15286 const Function *Caller = CI->getParent()->getParent();
15287 auto Attr = Caller->getFnAttribute("disable-tail-calls");
15288 if (Attr.getValueAsString() == "true")
15289 return false;
15290
15291 // If sibling calls have been disabled and tail-calls aren't guaranteed
15292 // there is no reason to duplicate.
15293 auto &TM = getTargetMachine();
15294 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
15295 return false;
15296
15297 // Can't tail call a function called indirectly, or if it has variadic args.
15298 const Function *Callee = CI->getCalledFunction();
15299 if (!Callee || Callee->isVarArg())
15300 return false;
15301
15302 // Make sure the callee and caller calling conventions are eligible for tco.
15303 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
15304 CI->getCallingConv()))
15305 return false;
15306
15307 // If the function is local then we have a good chance at tail-calling it
15308 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
15309}
15310
15311bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
15312 if (!Subtarget.hasVSX())
15313 return false;
15314 if (Subtarget.hasP9Vector() && VT == MVT::f128)
15315 return true;
15316 return VT == MVT::f32 || VT == MVT::f64 ||
15317 VT == MVT::v4f32 || VT == MVT::v2f64;
15318}
15319
15320bool PPCTargetLowering::
15321isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
15322 const Value *Mask = AndI.getOperand(1);
15323 // If the mask is suitable for andi. or andis. we should sink the and.
15324 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
15325 // Can't handle constants wider than 64-bits.
15326 if (CI->getBitWidth() > 64)
15327 return false;
15328 int64_t ConstVal = CI->getZExtValue();
15329 return isUInt<16>(ConstVal) ||
15330 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
15331 }
15332
15333 // For non-constant masks, we can always use the record-form and.
15334 return true;
15335}
15336
15337// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15338// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15339// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15340// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15341// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15342SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
15343 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(((N->getOpcode() == ISD::ABS) && "Need ABS node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15343, __PRETTY_FUNCTION__))
;
15344 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15345, __PRETTY_FUNCTION__))
15345 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15345, __PRETTY_FUNCTION__))
;
15346 EVT VT = N->getValueType(0);
15347 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15348 return SDValue();
15349
15350 SelectionDAG &DAG = DCI.DAG;
15351 SDLoc dl(N);
15352 if (N->getOperand(0).getOpcode() == ISD::SUB) {
15353 // Even for signed integers, if it's known to be positive (as signed
15354 // integer) due to zero-extended inputs.
15355 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
15356 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
15357 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
15358 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
15359 (SubOpcd1 == ISD::ZERO_EXTEND ||
15360 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
15361 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15362 N->getOperand(0)->getOperand(0),
15363 N->getOperand(0)->getOperand(1),
15364 DAG.getTargetConstant(0, dl, MVT::i32));
15365 }
15366
15367 // For type v4i32, it can be optimized with xvnegsp + vabsduw
15368 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
15369 N->getOperand(0).hasOneUse()) {
15370 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15371 N->getOperand(0)->getOperand(0),
15372 N->getOperand(0)->getOperand(1),
15373 DAG.getTargetConstant(1, dl, MVT::i32));
15374 }
15375 }
15376
15377 return SDValue();
15378}
15379
15380// For type v4i32/v8ii16/v16i8, transform
15381// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15382// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15383// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15384// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15385SDValue PPCTargetLowering::combineVSelect(SDNode *N,
15386 DAGCombinerInfo &DCI) const {
15387 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15387, __PRETTY_FUNCTION__))
;
15388 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15389, __PRETTY_FUNCTION__))
15389 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15389, __PRETTY_FUNCTION__))
;
15390
15391 SelectionDAG &DAG = DCI.DAG;
15392 SDLoc dl(N);
15393 SDValue Cond = N->getOperand(0);
15394 SDValue TrueOpnd = N->getOperand(1);
15395 SDValue FalseOpnd = N->getOperand(2);
15396 EVT VT = N->getOperand(1).getValueType();
15397
15398 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
15399 FalseOpnd.getOpcode() != ISD::SUB)
15400 return SDValue();
15401
15402 // ABSD only available for type v4i32/v8i16/v16i8
15403 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15404 return SDValue();
15405
15406 // At least to save one more dependent computation
15407 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
15408 return SDValue();
15409
15410 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15411
15412 // Can only handle unsigned comparison here
15413 switch (CC) {
15414 default:
15415 return SDValue();
15416 case ISD::SETUGT:
15417 case ISD::SETUGE:
15418 break;
15419 case ISD::SETULT:
15420 case ISD::SETULE:
15421 std::swap(TrueOpnd, FalseOpnd);
15422 break;
15423 }
15424
15425 SDValue CmpOpnd1 = Cond.getOperand(0);
15426 SDValue CmpOpnd2 = Cond.getOperand(1);
15427
15428 // SETCC CmpOpnd1 CmpOpnd2 cond
15429 // TrueOpnd = CmpOpnd1 - CmpOpnd2
15430 // FalseOpnd = CmpOpnd2 - CmpOpnd1
15431 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
15432 TrueOpnd.getOperand(1) == CmpOpnd2 &&
15433 FalseOpnd.getOperand(0) == CmpOpnd2 &&
15434 FalseOpnd.getOperand(1) == CmpOpnd1) {
15435 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
15436 CmpOpnd1, CmpOpnd2,
15437 DAG.getTargetConstant(0, dl, MVT::i32));
15438 }
15439
15440 return SDValue();
15441}

/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/AlignOf.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MachineValueType.h"
45#include <algorithm>
46#include <cassert>
47#include <climits>
48#include <cstddef>
49#include <cstdint>
50#include <cstring>
51#include <iterator>
52#include <string>
53#include <tuple>
54
55namespace llvm {
56
57class APInt;
58class Constant;
59template <typename T> struct DenseMapInfo;
60class GlobalValue;
61class MachineBasicBlock;
62class MachineConstantPoolValue;
63class MCSymbol;
64class raw_ostream;
65class SDNode;
66class SelectionDAG;
67class Type;
68class Value;
69
70void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
71 bool force = false);
72
73/// This represents a list of ValueType's that has been intern'd by
74/// a SelectionDAG. Instances of this simple value class are returned by
75/// SelectionDAG::getVTList(...).
76///
77struct SDVTList {
78 const EVT *VTs;
79 unsigned int NumVTs;
80};
81
82namespace ISD {
83
84 /// Node predicates
85
86 /// If N is a BUILD_VECTOR node whose elements are all the same constant or
87 /// undefined, return true and return the constant value in \p SplatValue.
88 bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
89
90 /// Return true if the specified node is a BUILD_VECTOR where all of the
91 /// elements are ~0 or undef.
92 bool isBuildVectorAllOnes(const SDNode *N);
93
94 /// Return true if the specified node is a BUILD_VECTOR where all of the
95 /// elements are 0 or undef.
96 bool isBuildVectorAllZeros(const SDNode *N);
97
98 /// Return true if the specified node is a BUILD_VECTOR node of all
99 /// ConstantSDNode or undef.
100 bool isBuildVectorOfConstantSDNodes(const SDNode *N);
101
102 /// Return true if the specified node is a BUILD_VECTOR node of all
103 /// ConstantFPSDNode or undef.
104 bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
105
106 /// Return true if the node has at least one operand and all operands of the
107 /// specified node are ISD::UNDEF.
108 bool allOperandsUndef(const SDNode *N);
109
110} // end namespace ISD
111
112//===----------------------------------------------------------------------===//
113/// Unlike LLVM values, Selection DAG nodes may return multiple
114/// values as the result of a computation. Many nodes return multiple values,
115/// from loads (which define a token and a return value) to ADDC (which returns
116/// a result and a carry value), to calls (which may return an arbitrary number
117/// of values).
118///
119/// As such, each use of a SelectionDAG computation must indicate the node that
120/// computes it as well as which return value to use from that node. This pair
121/// of information is represented with the SDValue value type.
122///
123class SDValue {
124 friend struct DenseMapInfo<SDValue>;
125
126 SDNode *Node = nullptr; // The node defining the value we are using.
127 unsigned ResNo = 0; // Which return value of the node we are using.
128
129public:
130 SDValue() = default;
131 SDValue(SDNode *node, unsigned resno);
132
133 /// get the index which selects a specific result in the SDNode
134 unsigned getResNo() const { return ResNo; }
135
136 /// get the SDNode which holds the desired result
137 SDNode *getNode() const { return Node; }
138
139 /// set the SDNode
140 void setNode(SDNode *N) { Node = N; }
141
142 inline SDNode *operator->() const { return Node; }
143
144 bool operator==(const SDValue &O) const {
145 return Node == O.Node && ResNo == O.ResNo;
146 }
147 bool operator!=(const SDValue &O) const {
148 return !operator==(O);
149 }
150 bool operator<(const SDValue &O) const {
151 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
152 }
153 explicit operator bool() const {
154 return Node != nullptr;
155 }
156
157 SDValue getValue(unsigned R) const {
158 return SDValue(Node, R);
159 }
160
161 /// Return true if this node is an operand of N.
162 bool isOperandOf(const SDNode *N) const;
163
164 /// Return the ValueType of the referenced return value.
165 inline EVT getValueType() const;
166
167 /// Return the simple ValueType of the referenced return value.
168 MVT getSimpleValueType() const {
169 return getValueType().getSimpleVT();
170 }
171
172 /// Returns the size of the value in bits.
173 unsigned getValueSizeInBits() const {
174 return getValueType().getSizeInBits();
175 }
176
177 unsigned getScalarValueSizeInBits() const {
178 return getValueType().getScalarType().getSizeInBits();
179 }
180
181 // Forwarding methods - These forward to the corresponding methods in SDNode.
182 inline unsigned getOpcode() const;
183 inline unsigned getNumOperands() const;
184 inline const SDValue &getOperand(unsigned i) const;
185 inline uint64_t getConstantOperandVal(unsigned i) const;
186 inline const APInt &getConstantOperandAPInt(unsigned i) const;
187 inline bool isTargetMemoryOpcode() const;
188 inline bool isTargetOpcode() const;
189 inline bool isMachineOpcode() const;
190 inline bool isUndef() const;
191 inline unsigned getMachineOpcode() const;
192 inline const DebugLoc &getDebugLoc() const;
193 inline void dump() const;
194 inline void dump(const SelectionDAG *G) const;
195 inline void dumpr() const;
196 inline void dumpr(const SelectionDAG *G) const;
197
198 /// Return true if this operand (which must be a chain) reaches the
199 /// specified operand without crossing any side-effecting instructions.
200 /// In practice, this looks through token factors and non-volatile loads.
201 /// In order to remain efficient, this only
202 /// looks a couple of nodes in, it does not do an exhaustive search.
203 bool reachesChainWithoutSideEffects(SDValue Dest,
204 unsigned Depth = 2) const;
205
206 /// Return true if there are no nodes using value ResNo of Node.
207 inline bool use_empty() const;
208
209 /// Return true if there is exactly one node using value ResNo of Node.
210 inline bool hasOneUse() const;
211};
212
213template<> struct DenseMapInfo<SDValue> {
214 static inline SDValue getEmptyKey() {
215 SDValue V;
216 V.ResNo = -1U;
217 return V;
218 }
219
220 static inline SDValue getTombstoneKey() {
221 SDValue V;
222 V.ResNo = -2U;
223 return V;
224 }
225
226 static unsigned getHashValue(const SDValue &Val) {
227 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
228 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
229 }
230
231 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
232 return LHS == RHS;
233 }
234};
235
236/// Allow casting operators to work directly on
237/// SDValues as if they were SDNode*'s.
238template<> struct simplify_type<SDValue> {
239 using SimpleType = SDNode *;
240
241 static SimpleType getSimplifiedValue(SDValue &Val) {
242 return Val.getNode();
243 }
244};
245template<> struct simplify_type<const SDValue> {
246 using SimpleType = /*const*/ SDNode *;
247
248 static SimpleType getSimplifiedValue(const SDValue &Val) {
249 return Val.getNode();
250 }
251};
252
253/// Represents a use of a SDNode. This class holds an SDValue,
254/// which records the SDNode being used and the result number, a
255/// pointer to the SDNode using the value, and Next and Prev pointers,
256/// which link together all the uses of an SDNode.
257///
258class SDUse {
259 /// Val - The value being used.
260 SDValue Val;
261 /// User - The user of this value.
262 SDNode *User = nullptr;
263 /// Prev, Next - Pointers to the uses list of the SDNode referred by
264 /// this operand.
265 SDUse **Prev = nullptr;
266 SDUse *Next = nullptr;
267
268public:
269 SDUse() = default;
270 SDUse(const SDUse &U) = delete;
271 SDUse &operator=(const SDUse &) = delete;
272
273 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
274 operator const SDValue&() const { return Val; }
275
276 /// If implicit conversion to SDValue doesn't work, the get() method returns
277 /// the SDValue.
278 const SDValue &get() const { return Val; }
279
280 /// This returns the SDNode that contains this Use.
281 SDNode *getUser() { return User; }
282
283 /// Get the next SDUse in the use list.
284 SDUse *getNext() const { return Next; }
285
286 /// Convenience function for get().getNode().
287 SDNode *getNode() const { return Val.getNode(); }
288 /// Convenience function for get().getResNo().
289 unsigned getResNo() const { return Val.getResNo(); }
290 /// Convenience function for get().getValueType().
291 EVT getValueType() const { return Val.getValueType(); }
292
293 /// Convenience function for get().operator==
294 bool operator==(const SDValue &V) const {
295 return Val == V;
296 }
297
298 /// Convenience function for get().operator!=
299 bool operator!=(const SDValue &V) const {
300 return Val != V;
301 }
302
303 /// Convenience function for get().operator<
304 bool operator<(const SDValue &V) const {
305 return Val < V;
306 }
307
308private:
309 friend class SelectionDAG;
310 friend class SDNode;
311 // TODO: unfriend HandleSDNode once we fix its operand handling.
312 friend class HandleSDNode;
313
314 void setUser(SDNode *p) { User = p; }
315
316 /// Remove this use from its existing use list, assign it the
317 /// given value, and add it to the new value's node's use list.
318 inline void set(const SDValue &V);
319 /// Like set, but only supports initializing a newly-allocated
320 /// SDUse with a non-null value.
321 inline void setInitial(const SDValue &V);
322 /// Like set, but only sets the Node portion of the value,
323 /// leaving the ResNo portion unmodified.
324 inline void setNode(SDNode *N);
325
326 void addToList(SDUse **List) {
327 Next = *List;
328 if (Next) Next->Prev = &Next;
329 Prev = List;
330 *List = this;
331 }
332
333 void removeFromList() {
334 *Prev = Next;
335 if (Next) Next->Prev = Prev;
336 }
337};
338
339/// simplify_type specializations - Allow casting operators to work directly on
340/// SDValues as if they were SDNode*'s.
341template<> struct simplify_type<SDUse> {
342 using SimpleType = SDNode *;
343
344 static SimpleType getSimplifiedValue(SDUse &Val) {
345 return Val.getNode();
346 }
347};
348
349/// These are IR-level optimization flags that may be propagated to SDNodes.
350/// TODO: This data structure should be shared by the IR optimizer and the
351/// the backend.
352struct SDNodeFlags {
353private:
354 // This bit is used to determine if the flags are in a defined state.
355 // Flag bits can only be masked out during intersection if the masking flags
356 // are defined.
357 bool AnyDefined : 1;
358
359 bool NoUnsignedWrap : 1;
360 bool NoSignedWrap : 1;
361 bool Exact : 1;
362 bool NoNaNs : 1;
363 bool NoInfs : 1;
364 bool NoSignedZeros : 1;
365 bool AllowReciprocal : 1;
366 bool VectorReduction : 1;
367 bool AllowContract : 1;
368 bool ApproximateFuncs : 1;
369 bool AllowReassociation : 1;
370
371 // We assume instructions do not raise floating-point exceptions by default,
372 // and only those marked explicitly may do so. We could choose to represent
373 // this via a positive "FPExcept" flags like on the MI level, but having a
374 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
375 // intersection logic more straightforward.
376 bool NoFPExcept : 1;
377
378public:
379 /// Default constructor turns off all optimization flags.
380 SDNodeFlags()
381 : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
382 Exact(false), NoNaNs(false), NoInfs(false),
383 NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
384 AllowContract(false), ApproximateFuncs(false),
385 AllowReassociation(false), NoFPExcept(true) {}
386
387 /// Propagate the fast-math-flags from an IR FPMathOperator.
388 void copyFMF(const FPMathOperator &FPMO) {
389 setNoNaNs(FPMO.hasNoNaNs());
390 setNoInfs(FPMO.hasNoInfs());
391 setNoSignedZeros(FPMO.hasNoSignedZeros());
392 setAllowReciprocal(FPMO.hasAllowReciprocal());
393 setAllowContract(FPMO.hasAllowContract());
394 setApproximateFuncs(FPMO.hasApproxFunc());
395 setAllowReassociation(FPMO.hasAllowReassoc());
396 }
397
398 /// Sets the state of the flags to the defined state.
399 void setDefined() { AnyDefined = true; }
400 /// Returns true if the flags are in a defined state.
401 bool isDefined() const { return AnyDefined; }
402
403 // These are mutators for each flag.
404 void setNoUnsignedWrap(bool b) {
405 setDefined();
406 NoUnsignedWrap = b;
407 }
408 void setNoSignedWrap(bool b) {
409 setDefined();
410 NoSignedWrap = b;
411 }
412 void setExact(bool b) {
413 setDefined();
414 Exact = b;
415 }
416 void setNoNaNs(bool b) {
417 setDefined();
418 NoNaNs = b;
419 }
420 void setNoInfs(bool b) {
421 setDefined();
422 NoInfs = b;
423 }
424 void setNoSignedZeros(bool b) {
425 setDefined();
426 NoSignedZeros = b;
427 }
428 void setAllowReciprocal(bool b) {
429 setDefined();
430 AllowReciprocal = b;
431 }
432 void setVectorReduction(bool b) {
433 setDefined();
434 VectorReduction = b;
435 }
436 void setAllowContract(bool b) {
437 setDefined();
438 AllowContract = b;
439 }
440 void setApproximateFuncs(bool b) {
441 setDefined();
442 ApproximateFuncs = b;
443 }
444 void setAllowReassociation(bool b) {
445 setDefined();
446 AllowReassociation = b;
447 }
448 void setFPExcept(bool b) {
449 setDefined();
450 NoFPExcept = !b;
451 }
452
453 // These are accessors for each flag.
454 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
455 bool hasNoSignedWrap() const { return NoSignedWrap; }
456 bool hasExact() const { return Exact; }
457 bool hasNoNaNs() const { return NoNaNs; }
458 bool hasNoInfs() const { return NoInfs; }
459 bool hasNoSignedZeros() const { return NoSignedZeros; }
460 bool hasAllowReciprocal() const { return AllowReciprocal; }
461 bool hasVectorReduction() const { return VectorReduction; }
462 bool hasAllowContract() const { return AllowContract; }
463 bool hasApproximateFuncs() const { return ApproximateFuncs; }
464 bool hasAllowReassociation() const { return AllowReassociation; }
465 bool hasFPExcept() const { return !NoFPExcept; }
466
467 bool isFast() const {
468 return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
469 AllowContract && ApproximateFuncs && AllowReassociation;
470 }
471
472 /// Clear any flags in this flag set that aren't also set in Flags.
473 /// If the given Flags are undefined then don't do anything.
474 void intersectWith(const SDNodeFlags Flags) {
475 if (!Flags.isDefined())
476 return;
477 NoUnsignedWrap &= Flags.NoUnsignedWrap;
478 NoSignedWrap &= Flags.NoSignedWrap;
479 Exact &= Flags.Exact;
480 NoNaNs &= Flags.NoNaNs;
481 NoInfs &= Flags.NoInfs;
482 NoSignedZeros &= Flags.NoSignedZeros;
483 AllowReciprocal &= Flags.AllowReciprocal;
484 VectorReduction &= Flags.VectorReduction;
485 AllowContract &= Flags.AllowContract;
486 ApproximateFuncs &= Flags.ApproximateFuncs;
487 AllowReassociation &= Flags.AllowReassociation;
488 NoFPExcept &= Flags.NoFPExcept;
489 }
490};
491
492/// Represents one node in the SelectionDAG.
493///
494class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
495private:
496 /// The operation that this node performs.
497 int16_t NodeType;
498
499protected:
500 // We define a set of mini-helper classes to help us interpret the bits in our
501 // SubclassData. These are designed to fit within a uint16_t so they pack
502 // with NodeType.
503
504#if defined(_AIX) && (!defined(__GNUC__4) || defined(__ibmxl__))
505// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
506// and give the `pack` pragma push semantics.
507#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
508#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
509#else
510#define BEGIN_TWO_BYTE_PACK()
511#define END_TWO_BYTE_PACK()
512#endif
513
514BEGIN_TWO_BYTE_PACK()
515 class SDNodeBitfields {
516 friend class SDNode;
517 friend class MemIntrinsicSDNode;
518 friend class MemSDNode;
519 friend class SelectionDAG;
520
521 uint16_t HasDebugValue : 1;
522 uint16_t IsMemIntrinsic : 1;
523 uint16_t IsDivergent : 1;
524 };
525 enum { NumSDNodeBits = 3 };
526
527 class ConstantSDNodeBitfields {
528 friend class ConstantSDNode;
529
530 uint16_t : NumSDNodeBits;
531
532 uint16_t IsOpaque : 1;
533 };
534
535 class MemSDNodeBitfields {
536 friend class MemSDNode;
537 friend class MemIntrinsicSDNode;
538 friend class AtomicSDNode;
539
540 uint16_t : NumSDNodeBits;
541
542 uint16_t IsVolatile : 1;
543 uint16_t IsNonTemporal : 1;
544 uint16_t IsDereferenceable : 1;
545 uint16_t IsInvariant : 1;
546 };
547 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
548
549 class LSBaseSDNodeBitfields {
550 friend class LSBaseSDNode;
551 friend class MaskedGatherScatterSDNode;
552
553 uint16_t : NumMemSDNodeBits;
554
555 // This storage is shared between disparate class hierarchies to hold an
556 // enumeration specific to the class hierarchy in use.
557 // LSBaseSDNode => enum ISD::MemIndexedMode
558 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
559 uint16_t AddressingMode : 3;
560 };
561 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
562
563 class LoadSDNodeBitfields {
564 friend class LoadSDNode;
565 friend class MaskedLoadSDNode;
566
567 uint16_t : NumLSBaseSDNodeBits;
568
569 uint16_t ExtTy : 2; // enum ISD::LoadExtType
570 uint16_t IsExpanding : 1;
571 };
572
573 class StoreSDNodeBitfields {
574 friend class StoreSDNode;
575 friend class MaskedStoreSDNode;
576
577 uint16_t : NumLSBaseSDNodeBits;
578
579 uint16_t IsTruncating : 1;
580 uint16_t IsCompressing : 1;
581 };
582
583 union {
584 char RawSDNodeBits[sizeof(uint16_t)];
585 SDNodeBitfields SDNodeBits;
586 ConstantSDNodeBitfields ConstantSDNodeBits;
587 MemSDNodeBitfields MemSDNodeBits;
588 LSBaseSDNodeBitfields LSBaseSDNodeBits;
589 LoadSDNodeBitfields LoadSDNodeBits;
590 StoreSDNodeBitfields StoreSDNodeBits;
591 };
592END_TWO_BYTE_PACK()
593#undef BEGIN_TWO_BYTE_PACK
594#undef END_TWO_BYTE_PACK
595
596 // RawSDNodeBits must cover the entirety of the union. This means that all of
597 // the union's members must have size <= RawSDNodeBits. We write the RHS as
598 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
599 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
600 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
601 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
602 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
603 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
604 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
605
606private:
607 friend class SelectionDAG;
608 // TODO: unfriend HandleSDNode once we fix its operand handling.
609 friend class HandleSDNode;
610
611 /// Unique id per SDNode in the DAG.
612 int NodeId = -1;
613
614 /// The values that are used by this operation.
615 SDUse *OperandList = nullptr;
616
617 /// The types of the values this node defines. SDNode's may
618 /// define multiple values simultaneously.
619 const EVT *ValueList;
620
621 /// List of uses for this SDNode.
622 SDUse *UseList = nullptr;
623
624 /// The number of entries in the Operand/Value list.
625 unsigned short NumOperands = 0;
626 unsigned short NumValues;
627
628 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
629 // original LLVM instructions.
630 // This is used for turning off scheduling, because we'll forgo
631 // the normal scheduling algorithms and output the instructions according to
632 // this ordering.
633 unsigned IROrder;
634
635 /// Source line information.
636 DebugLoc debugLoc;
637
638 /// Return a pointer to the specified value type.
639 static const EVT *getValueTypeList(EVT VT);
640
641 SDNodeFlags Flags;
642
643public:
644 /// Unique and persistent id per SDNode in the DAG.
645 /// Used for debug printing.
646 uint16_t PersistentId;
647
648 //===--------------------------------------------------------------------===//
649 // Accessors
650 //
651
652 /// Return the SelectionDAG opcode value for this node. For
653 /// pre-isel nodes (those for which isMachineOpcode returns false), these
654 /// are the opcode values in the ISD and <target>ISD namespaces. For
655 /// post-isel opcodes, see getMachineOpcode.
656 unsigned getOpcode() const { return (unsigned short)NodeType; }
657
658 /// Test if this node has a target-specific opcode (in the
659 /// \<target\>ISD namespace).
660 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
661
662 /// Test if this node has a target-specific
663 /// memory-referencing opcode (in the \<target\>ISD namespace and
664 /// greater than FIRST_TARGET_MEMORY_OPCODE).
665 bool isTargetMemoryOpcode() const {
666 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
667 }
668
669 /// Return true if the type of the node type undefined.
670 bool isUndef() const { return NodeType == ISD::UNDEF; }
12
Assuming field 'NodeType' is not equal to UNDEF
13
Returning zero, which participates in a condition later
671
672 /// Test if this node is a memory intrinsic (with valid pointer information).
673 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
674 /// non-memory intrinsics (with chains) that are not really instances of
675 /// MemSDNode. For such nodes, we need some extra state to determine the
676 /// proper classof relationship.
677 bool isMemIntrinsic() const {
678 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
679 NodeType == ISD::INTRINSIC_VOID) &&
680 SDNodeBits.IsMemIntrinsic;
681 }
682
683 /// Test if this node is a strict floating point pseudo-op.
684 bool isStrictFPOpcode() {
685 switch (NodeType) {
686 default:
687 return false;
688 case ISD::STRICT_FADD:
689 case ISD::STRICT_FSUB:
690 case ISD::STRICT_FMUL:
691 case ISD::STRICT_FDIV:
692 case ISD::STRICT_FREM:
693 case ISD::STRICT_FMA:
694 case ISD::STRICT_FSQRT:
695 case ISD::STRICT_FPOW:
696 case ISD::STRICT_FPOWI:
697 case ISD::STRICT_FSIN:
698 case ISD::STRICT_FCOS:
699 case ISD::STRICT_FEXP:
700 case ISD::STRICT_FEXP2:
701 case ISD::STRICT_FLOG:
702 case ISD::STRICT_FLOG10:
703 case ISD::STRICT_FLOG2:
704 case ISD::STRICT_FRINT:
705 case ISD::STRICT_FNEARBYINT:
706 case ISD::STRICT_FMAXNUM:
707 case ISD::STRICT_FMINNUM:
708 case ISD::STRICT_FCEIL:
709 case ISD::STRICT_FFLOOR:
710 case ISD::STRICT_FROUND:
711 case ISD::STRICT_FTRUNC:
712 case ISD::STRICT_FP_TO_SINT:
713 case ISD::STRICT_FP_TO_UINT:
714 case ISD::STRICT_FP_ROUND:
715 case ISD::STRICT_FP_EXTEND:
716 return true;
717 }
718 }
719
720 /// Test if this node has a post-isel opcode, directly
721 /// corresponding to a MachineInstr opcode.
722 bool isMachineOpcode() const { return NodeType < 0; }
723
724 /// This may only be called if isMachineOpcode returns
725 /// true. It returns the MachineInstr opcode value that the node's opcode
726 /// corresponds to.
727 unsigned getMachineOpcode() const {
728 assert(isMachineOpcode() && "Not a MachineInstr opcode!")((isMachineOpcode() && "Not a MachineInstr opcode!") ?
static_cast<void> (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 728, __PRETTY_FUNCTION__))
;
729 return ~NodeType;
730 }
731
732 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
733 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
734
735 bool isDivergent() const { return SDNodeBits.IsDivergent; }
736
737 /// Return true if there are no uses of this node.
738 bool use_empty() const { return UseList == nullptr; }
739
740 /// Return true if there is exactly one use of this node.
741 bool hasOneUse() const {
742 return !use_empty() && std::next(use_begin()) == use_end();
743 }
744
745 /// Return the number of uses of this node. This method takes
746 /// time proportional to the number of uses.
747 size_t use_size() const { return std::distance(use_begin(), use_end()); }
748
749 /// Return the unique node id.
750 int getNodeId() const { return NodeId; }
751
752 /// Set unique node id.
753 void setNodeId(int Id) { NodeId = Id; }
754
755 /// Return the node ordering.
756 unsigned getIROrder() const { return IROrder; }
757
758 /// Set the node ordering.
759 void setIROrder(unsigned Order) { IROrder = Order; }
760
761 /// Return the source location info.
762 const DebugLoc &getDebugLoc() const { return debugLoc; }
763
764 /// Set source location info. Try to avoid this, putting
765 /// it in the constructor is preferable.
766 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
767
768 /// This class provides iterator support for SDUse
769 /// operands that use a specific SDNode.
770 class use_iterator
771 : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
772 friend class SDNode;
773
774 SDUse *Op = nullptr;
775
776 explicit use_iterator(SDUse *op) : Op(op) {}
777
778 public:
779 using reference = std::iterator<std::forward_iterator_tag,
780 SDUse, ptrdiff_t>::reference;
781 using pointer = std::iterator<std::forward_iterator_tag,
782 SDUse, ptrdiff_t>::pointer;
783
784 use_iterator() = default;
785 use_iterator(const use_iterator &I) : Op(I.Op) {}
786
787 bool operator==(const use_iterator &x) const {
788 return Op == x.Op;
789 }
790 bool operator!=(const use_iterator &x) const {
791 return !operator==(x);
792 }
793
794 /// Return true if this iterator is at the end of uses list.
795 bool atEnd() const { return Op == nullptr; }
796
797 // Iterator traversal: forward iteration only.
798 use_iterator &operator++() { // Preincrement
799 assert(Op && "Cannot increment end iterator!")((Op && "Cannot increment end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 799, __PRETTY_FUNCTION__))
;
800 Op = Op->getNext();
801 return *this;
802 }
803
804 use_iterator operator++(int) { // Postincrement
805 use_iterator tmp = *this; ++*this; return tmp;
806 }
807
808 /// Retrieve a pointer to the current user node.
809 SDNode *operator*() const {
810 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 810, __PRETTY_FUNCTION__))
;
811 return Op->getUser();
812 }
813
814 SDNode *operator->() const { return operator*(); }
815
816 SDUse &getUse() const { return *Op; }
817
818 /// Retrieve the operand # of this use in its user.
819 unsigned getOperandNo() const {
820 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 820, __PRETTY_FUNCTION__))
;
821 return (unsigned)(Op - Op->getUser()->OperandList);
822 }
823 };
824
825 /// Provide iteration support to walk over all uses of an SDNode.
826 use_iterator use_begin() const {
827 return use_iterator(UseList);
828 }
829
830 static use_iterator use_end() { return use_iterator(nullptr); }
831
832 inline iterator_range<use_iterator> uses() {
833 return make_range(use_begin(), use_end());
834 }
835 inline iterator_range<use_iterator> uses() const {
836 return make_range(use_begin(), use_end());
837 }
838
839 /// Return true if there are exactly NUSES uses of the indicated value.
840 /// This method ignores uses of other values defined by this operation.
841 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
842
843 /// Return true if there are any use of the indicated value.
844 /// This method ignores uses of other values defined by this operation.
845 bool hasAnyUseOfValue(unsigned Value) const;
846
847 /// Return true if this node is the only use of N.
848 bool isOnlyUserOf(const SDNode *N) const;
849
850 /// Return true if this node is an operand of N.
851 bool isOperandOf(const SDNode *N) const;
852
853 /// Return true if this node is a predecessor of N.
854 /// NOTE: Implemented on top of hasPredecessor and every bit as
855 /// expensive. Use carefully.
856 bool isPredecessorOf(const SDNode *N) const {
857 return N->hasPredecessor(this);
858 }
859
860 /// Return true if N is a predecessor of this node.
861 /// N is either an operand of this node, or can be reached by recursively
862 /// traversing up the operands.
863 /// NOTE: This is an expensive method. Use it carefully.
864 bool hasPredecessor(const SDNode *N) const;
865
866 /// Returns true if N is a predecessor of any node in Worklist. This
867 /// helper keeps Visited and Worklist sets externally to allow unions
868 /// searches to be performed in parallel, caching of results across
869 /// queries and incremental addition to Worklist. Stops early if N is
870 /// found but will resume. Remember to clear Visited and Worklists
871 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
872 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
873 /// topologically ordered (Operands have strictly smaller node id) and search
874 /// can be pruned leveraging this.
875 static bool hasPredecessorHelper(const SDNode *N,
876 SmallPtrSetImpl<const SDNode *> &Visited,
877 SmallVectorImpl<const SDNode *> &Worklist,
878 unsigned int MaxSteps = 0,
879 bool TopologicalPrune = false) {
880 SmallVector<const SDNode *, 8> DeferredNodes;
881 if (Visited.count(N))
882 return true;
883
884 // Node Id's are assigned in three places: As a topological
885 // ordering (> 0), during legalization (results in values set to
886 // 0), new nodes (set to -1). If N has a topolgical id then we
887 // know that all nodes with ids smaller than it cannot be
888 // successors and we need not check them. Filter out all node
889 // that can't be matches. We add them to the worklist before exit
890 // in case of multiple calls. Note that during selection the topological id
891 // may be violated if a node's predecessor is selected before it. We mark
892 // this at selection negating the id of unselected successors and
893 // restricting topological pruning to positive ids.
894
895 int NId = N->getNodeId();
896 // If we Invalidated the Id, reconstruct original NId.
897 if (NId < -1)
898 NId = -(NId + 1);
899
900 bool Found = false;
901 while (!Worklist.empty()) {
902 const SDNode *M = Worklist.pop_back_val();
903 int MId = M->getNodeId();
904 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
905 (MId > 0) && (MId < NId)) {
906 DeferredNodes.push_back(M);
907 continue;
908 }
909 for (const SDValue &OpV : M->op_values()) {
910 SDNode *Op = OpV.getNode();
911 if (Visited.insert(Op).second)
912 Worklist.push_back(Op);
913 if (Op == N)
914 Found = true;
915 }
916 if (Found)
917 break;
918 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
919 break;
920 }
921 // Push deferred nodes back on worklist.
922 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
923 // If we bailed early, conservatively return found.
924 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
925 return true;
926 return Found;
927 }
928
929 /// Return true if all the users of N are contained in Nodes.
930 /// NOTE: Requires at least one match, but doesn't require them all.
931 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
932
933 /// Return the number of values used by this operation.
934 unsigned getNumOperands() const { return NumOperands; }
935
936 /// Return the maximum number of operands that a SDNode can hold.
937 static constexpr size_t getMaxNumOperands() {
938 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
939 }
940
941 /// Helper method returns the integer value of a ConstantSDNode operand.
942 inline uint64_t getConstantOperandVal(unsigned Num) const;
943
944 /// Helper method returns the APInt of a ConstantSDNode operand.
945 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
946
947 const SDValue &getOperand(unsigned Num) const {
948 assert(Num < NumOperands && "Invalid child # of SDNode!")((Num < NumOperands && "Invalid child # of SDNode!"
) ? static_cast<void> (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 948, __PRETTY_FUNCTION__))
;
949 return OperandList[Num];
950 }
951
952 using op_iterator = SDUse *;
953
954 op_iterator op_begin() const { return OperandList; }
955 op_iterator op_end() const { return OperandList+NumOperands; }
956 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
957
958 /// Iterator for directly iterating over the operand SDValue's.
959 struct value_op_iterator
960 : iterator_adaptor_base<value_op_iterator, op_iterator,
961 std::random_access_iterator_tag, SDValue,
962 ptrdiff_t, value_op_iterator *,
963 value_op_iterator *> {
964 explicit value_op_iterator(SDUse *U = nullptr)
965 : iterator_adaptor_base(U) {}
966
967 const SDValue &operator*() const { return I->get(); }
968 };
969
970 iterator_range<value_op_iterator> op_values() const {
971 return make_range(value_op_iterator(op_begin()),
972 value_op_iterator(op_end()));
973 }
974
975 SDVTList getVTList() const {
976 SDVTList X = { ValueList, NumValues };
977 return X;
978 }
979
980 /// If this node has a glue operand, return the node
981 /// to which the glue operand points. Otherwise return NULL.
982 SDNode *getGluedNode() const {
983 if (getNumOperands() != 0 &&
984 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
985 return getOperand(getNumOperands()-1).getNode();
986 return nullptr;
987 }
988
989 /// If this node has a glue value with a user, return
990 /// the user (there is at most one). Otherwise return NULL.
991 SDNode *getGluedUser() const {
992 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
993 if (UI.getUse().get().getValueType() == MVT::Glue)
994 return *UI;
995 return nullptr;
996 }
997
998 const SDNodeFlags getFlags() const { return Flags; }
999 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
1000 bool isFast() { return Flags.isFast(); }
1001
1002 /// Clear any flags in this node that aren't also set in Flags.
1003 /// If Flags is not in a defined state then this has no effect.
1004 void intersectFlagsWith(const SDNodeFlags Flags);
1005
1006 /// Return the number of values defined/returned by this operator.
1007 unsigned getNumValues() const { return NumValues; }
1008
1009 /// Return the type of a specified result.
1010 EVT getValueType(unsigned ResNo) const {
1011 assert(ResNo < NumValues && "Illegal result number!")((ResNo < NumValues && "Illegal result number!") ?
static_cast<void> (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1011, __PRETTY_FUNCTION__))
;
1012 return ValueList[ResNo];
1013 }
1014
1015 /// Return the type of a specified result as a simple type.
1016 MVT getSimpleValueType(unsigned ResNo) const {
1017 return getValueType(ResNo).getSimpleVT();
1018 }
1019
1020 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
1021 unsigned getValueSizeInBits(unsigned ResNo) const {
1022 return getValueType(ResNo).getSizeInBits();
1023 }
1024
1025 using value_iterator = const EVT *;
1026
1027 value_iterator value_begin() const { return ValueList; }
1028 value_iterator value_end() const { return ValueList+NumValues; }
1029
1030 /// Return the opcode of this operation for printing.
1031 std::string getOperationName(const SelectionDAG *G = nullptr) const;
1032 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
1033 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
1034 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
1035 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1036 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1037
1038 /// Print a SelectionDAG node and all children down to
1039 /// the leaves. The given SelectionDAG allows target-specific nodes
1040 /// to be printed in human-readable form. Unlike printr, this will
1041 /// print the whole DAG, including children that appear multiple
1042 /// times.
1043 ///
1044 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1045
1046 /// Print a SelectionDAG node and children up to
1047 /// depth "depth." The given SelectionDAG allows target-specific
1048 /// nodes to be printed in human-readable form. Unlike printr, this
1049 /// will print children that appear multiple times wherever they are
1050 /// used.
1051 ///
1052 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1053 unsigned depth = 100) const;
1054
1055 /// Dump this node, for debugging.
1056 void dump() const;
1057
1058 /// Dump (recursively) this node and its use-def subgraph.
1059 void dumpr() const;
1060
1061 /// Dump this node, for debugging.
1062 /// The given SelectionDAG allows target-specific nodes to be printed
1063 /// in human-readable form.
1064 void dump(const SelectionDAG *G) const;
1065
1066 /// Dump (recursively) this node and its use-def subgraph.
1067 /// The given SelectionDAG allows target-specific nodes to be printed
1068 /// in human-readable form.
1069 void dumpr(const SelectionDAG *G) const;
1070
1071 /// printrFull to dbgs(). The given SelectionDAG allows
1072 /// target-specific nodes to be printed in human-readable form.
1073 /// Unlike dumpr, this will print the whole DAG, including children
1074 /// that appear multiple times.
1075 void dumprFull(const SelectionDAG *G = nullptr) const;
1076
1077 /// printrWithDepth to dbgs(). The given
1078 /// SelectionDAG allows target-specific nodes to be printed in
1079 /// human-readable form. Unlike dumpr, this will print children
1080 /// that appear multiple times wherever they are used.
1081 ///
1082 void dumprWithDepth(const SelectionDAG *G = nullptr,
1083 unsigned depth = 100) const;
1084
1085 /// Gather unique data for the node.
1086 void Profile(FoldingSetNodeID &ID) const;
1087
1088 /// This method should only be used by the SDUse class.
1089 void addUse(SDUse &U) { U.addToList(&UseList); }
1090
1091protected:
1092 static SDVTList getSDVTList(EVT VT) {
1093 SDVTList Ret = { getValueTypeList(VT), 1 };
1094 return Ret;
1095 }
1096
1097 /// Create an SDNode.
1098 ///
1099 /// SDNodes are created without any operands, and never own the operand
1100 /// storage. To add operands, see SelectionDAG::createOperands.
1101 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1102 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1103 IROrder(Order), debugLoc(std::move(dl)) {
1104 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1105 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((debugLoc.hasTrivialDestructor() && "Expected trivial destructor"
) ? static_cast<void> (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __PRETTY_FUNCTION__))
;
1106 assert(NumValues == VTs.NumVTs &&((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1107, __PRETTY_FUNCTION__))
1107 "NumValues wasn't wide enough for its operands!")((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1107, __PRETTY_FUNCTION__))
;
1108 }
1109
1110 /// Release the operands and set this node to have zero operands.
1111 void DropOperands();
1112};
1113
1114/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1115/// into SDNode creation functions.
1116/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1117/// from the original Instruction, and IROrder is the ordinal position of
1118/// the instruction.
1119/// When an SDNode is created after the DAG is being built, both DebugLoc and
1120/// the IROrder are propagated from the original SDNode.
1121/// So SDLoc class provides two constructors besides the default one, one to
1122/// be used by the DAGBuilder, the other to be used by others.
1123class SDLoc {
1124private:
1125 DebugLoc DL;
1126 int IROrder = 0;
1127
1128public:
1129 SDLoc() = default;
1130 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1131 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1132 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1133 assert(Order >= 0 && "bad IROrder")((Order >= 0 && "bad IROrder") ? static_cast<void
> (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1133, __PRETTY_FUNCTION__))
;
1134 if (I)
1135 DL = I->getDebugLoc();
1136 }
1137
1138 unsigned getIROrder() const { return IROrder; }
1139 const DebugLoc &getDebugLoc() const { return DL; }
1140};
1141
1142// Define inline functions from the SDValue class.
1143
1144inline SDValue::SDValue(SDNode *node, unsigned resno)
1145 : Node(node), ResNo(resno) {
1146 // Explicitly check for !ResNo to avoid use-after-free, because there are
1147 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1148 // combines.
1149 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1150, __PRETTY_FUNCTION__))
1150 "Invalid result number for the given node!")(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1150, __PRETTY_FUNCTION__))
;
1151 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? static_cast<void> (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1151, __PRETTY_FUNCTION__))
;
1152}
1153
1154inline unsigned SDValue::getOpcode() const {
1155 return Node->getOpcode();
1156}
1157
1158inline EVT SDValue::getValueType() const {
1159 return Node->getValueType(ResNo);
1160}
1161
1162inline unsigned SDValue::getNumOperands() const {
1163 return Node->getNumOperands();
1164}
1165
1166inline const SDValue &SDValue::getOperand(unsigned i) const {
1167 return Node->getOperand(i);
1168}
1169
1170inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1171 return Node->getConstantOperandVal(i);
1172}
1173
1174inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1175 return Node->getConstantOperandAPInt(i);
1176}
1177
1178inline bool SDValue::isTargetOpcode() const {
1179 return Node->isTargetOpcode();
1180}
1181
1182inline bool SDValue::isTargetMemoryOpcode() const {
1183 return Node->isTargetMemoryOpcode();
1184}
1185
1186inline bool SDValue::isMachineOpcode() const {
1187 return Node->isMachineOpcode();
1188}
1189
1190inline unsigned SDValue::getMachineOpcode() const {
1191 return Node->getMachineOpcode();
1192}
1193
1194inline bool SDValue::isUndef() const {
1195 return Node->isUndef();
11
Calling 'SDNode::isUndef'
14
Returning from 'SDNode::isUndef'
15
Returning zero, which participates in a condition later
1196}
1197
1198inline bool SDValue::use_empty() const {
1199 return !Node->hasAnyUseOfValue(ResNo);
1200}
1201
1202inline bool SDValue::hasOneUse() const {
1203 return Node->hasNUsesOfValue(1, ResNo);
1204}
1205
1206inline const DebugLoc &SDValue::getDebugLoc() const {
1207 return Node->getDebugLoc();
1208}
1209
1210inline void SDValue::dump() const {
1211 return Node->dump();
1212}
1213
1214inline void SDValue::dump(const SelectionDAG *G) const {
1215 return Node->dump(G);
1216}
1217
1218inline void SDValue::dumpr() const {
1219 return Node->dumpr();
1220}
1221
1222inline void SDValue::dumpr(const SelectionDAG *G) const {
1223 return Node->dumpr(G);
1224}
1225
1226// Define inline functions from the SDUse class.
1227
1228inline void SDUse::set(const SDValue &V) {
1229 if (Val.getNode()) removeFromList();
1230 Val = V;
1231 if (V.getNode()) V.getNode()->addUse(*this);
1232}
1233
1234inline void SDUse::setInitial(const SDValue &V) {
1235 Val = V;
1236 V.getNode()->addUse(*this);
1237}
1238
1239inline void SDUse::setNode(SDNode *N) {
1240 if (Val.getNode()) removeFromList();
1241 Val.setNode(N);
1242 if (N) N->addUse(*this);
1243}
1244
1245/// This class is used to form a handle around another node that
1246/// is persistent and is updated across invocations of replaceAllUsesWith on its
1247/// operand. This node should be directly created by end-users and not added to
1248/// the AllNodes list.
1249class HandleSDNode : public SDNode {
1250 SDUse Op;
1251
1252public:
1253 explicit HandleSDNode(SDValue X)
1254 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1255 // HandleSDNodes are never inserted into the DAG, so they won't be
1256 // auto-numbered. Use ID 65535 as a sentinel.
1257 PersistentId = 0xffff;
1258
1259 // Manually set up the operand list. This node type is special in that it's
1260 // always stack allocated and SelectionDAG does not manage its operands.
1261 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1262 // be so special.
1263 Op.setUser(this);
1264 Op.setInitial(X);
1265 NumOperands = 1;
1266 OperandList = &Op;
1267 }
1268 ~HandleSDNode();
1269
1270 const SDValue &getValue() const { return Op; }
1271};
1272
1273class AddrSpaceCastSDNode : public SDNode {
1274private:
1275 unsigned SrcAddrSpace;
1276 unsigned DestAddrSpace;
1277
1278public:
1279 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1280 unsigned SrcAS, unsigned DestAS);
1281
1282 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1283 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1284
1285 static bool classof(const SDNode *N) {
1286 return N->getOpcode() == ISD::ADDRSPACECAST;
1287 }
1288};
1289
1290/// This is an abstract virtual class for memory operations.
1291class MemSDNode : public SDNode {
1292private:
1293 // VT of in-memory value.
1294 EVT MemoryVT;
1295
1296protected:
1297 /// Memory reference information.
1298 MachineMemOperand *MMO;
1299
1300public:
1301 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1302 EVT memvt, MachineMemOperand *MMO);
1303
1304 bool readMem() const { return MMO->isLoad(); }
1305 bool writeMem() const { return MMO->isStore(); }
1306
1307 /// Returns alignment and volatility of the memory access
1308 unsigned getOriginalAlignment() const {
1309 return MMO->getBaseAlignment();
1310 }
1311 unsigned getAlignment() const {
1312 return MMO->getAlignment();
1313 }
1314
1315 /// Return the SubclassData value, without HasDebugValue. This contains an
1316 /// encoding of the volatile flag, as well as bits used by subclasses. This
1317 /// function should only be used to compute a FoldingSetNodeID value.
1318 /// The HasDebugValue bit is masked out because CSE map needs to match
1319 /// nodes with debug info with nodes without debug info. Same is about
1320 /// isDivergent bit.
1321 unsigned getRawSubclassData() const {
1322 uint16_t Data;
1323 union {
1324 char RawSDNodeBits[sizeof(uint16_t)];
1325 SDNodeBitfields SDNodeBits;
1326 };
1327 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1328 SDNodeBits.HasDebugValue = 0;
1329 SDNodeBits.IsDivergent = false;
1330 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1331 return Data;
1332 }
1333
1334 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1335 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1336 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1337 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1338
1339 // Returns the offset from the location of the access.
1340 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1341
1342 /// Returns the AA info that describes the dereference.
1343 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1344
1345 /// Returns the Ranges that describes the dereference.
1346 const MDNode *getRanges() const { return MMO->getRanges(); }
1347
1348 /// Returns the synchronization scope ID for this memory operation.
1349 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1350
1351 /// Return the atomic ordering requirements for this memory operation. For
1352 /// cmpxchg atomic operations, return the atomic ordering requirements when
1353 /// store occurs.
1354 AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
1355
1356 /// Return true if the memory operation ordering is Unordered or higher.
1357 bool isAtomic() const { return MMO->isAtomic(); }
1358
1359 /// Returns true if the memory operation doesn't imply any ordering
1360 /// constraints on surrounding memory operations beyond the normal memory
1361 /// aliasing rules.
1362 bool isUnordered() const { return MMO->isUnordered(); }
1363
1364 /// Returns true if the memory operation is neither atomic or volatile.
1365 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1366
1367 /// Return the type of the in-memory value.
1368 EVT getMemoryVT() const { return MemoryVT; }
1369
1370 /// Return a MachineMemOperand object describing the memory
1371 /// reference performed by operation.
1372 MachineMemOperand *getMemOperand() const { return MMO; }
1373
1374 const MachinePointerInfo &getPointerInfo() const {
1375 return MMO->getPointerInfo();
1376 }
1377
1378 /// Return the address space for the associated pointer
1379 unsigned getAddressSpace() const {
1380 return getPointerInfo().getAddrSpace();
1381 }
1382
1383 /// Update this MemSDNode's MachineMemOperand information
1384 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1385 /// This must only be used when the new alignment applies to all users of
1386 /// this MachineMemOperand.
1387 void refineAlignment(const MachineMemOperand *NewMMO) {
1388 MMO->refineAlignment(NewMMO);
1389 }
1390
1391 const SDValue &getChain() const { return getOperand(0); }
1392 const SDValue &getBasePtr() const {
1393 return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
1394 }
1395
1396 // Methods to support isa and dyn_cast
1397 static bool classof(const SDNode *N) {
1398 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1399 // with either an intrinsic or a target opcode.
1400 return N->getOpcode() == ISD::LOAD ||
1401 N->getOpcode() == ISD::STORE ||
1402 N->getOpcode() == ISD::PREFETCH ||
1403 N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1404 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1405 N->getOpcode() == ISD::ATOMIC_SWAP ||
1406 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1407 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1408 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1409 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1410 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1411 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1412 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1413 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1414 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1415 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1416 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1417 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1418 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1419 N->getOpcode() == ISD::ATOMIC_LOAD ||
1420 N->getOpcode() == ISD::ATOMIC_STORE ||
1421 N->getOpcode() == ISD::MLOAD ||
1422 N->getOpcode() == ISD::MSTORE ||
1423 N->getOpcode() == ISD::MGATHER ||
1424 N->getOpcode() == ISD::MSCATTER ||
1425 N->isMemIntrinsic() ||
1426 N->isTargetMemoryOpcode();
1427 }
1428};
1429
1430/// This is an SDNode representing atomic operations.
1431class AtomicSDNode : public MemSDNode {
1432public:
1433 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1434 EVT MemVT, MachineMemOperand *MMO)
1435 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1436 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1437, __PRETTY_FUNCTION__))
1437 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1437, __PRETTY_FUNCTION__))
;
1438 }
1439
1440 const SDValue &getBasePtr() const { return getOperand(1); }
1441 const SDValue &getVal() const { return getOperand(2); }
1442
1443 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1444 /// otherwise.
1445 bool isCompareAndSwap() const {
1446 unsigned Op = getOpcode();
1447 return Op == ISD::ATOMIC_CMP_SWAP ||
1448 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1449 }
1450
1451 /// For cmpxchg atomic operations, return the atomic ordering requirements
1452 /// when store does not occur.
1453 AtomicOrdering getFailureOrdering() const {
1454 assert(isCompareAndSwap() && "Must be cmpxchg operation")((isCompareAndSwap() && "Must be cmpxchg operation") ?
static_cast<void> (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1454, __PRETTY_FUNCTION__))
;
1455 return MMO->getFailureOrdering();
1456 }
1457
1458 // Methods to support isa and dyn_cast
1459 static bool classof(const SDNode *N) {
1460 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1461 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1462 N->getOpcode() == ISD::ATOMIC_SWAP ||
1463 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1464 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1465 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1466 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1467 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1468 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1469 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1470 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1471 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1472 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1473 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1474 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1475 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1476 N->getOpcode() == ISD::ATOMIC_LOAD ||
1477 N->getOpcode() == ISD::ATOMIC_STORE;
1478 }
1479};
1480
1481/// This SDNode is used for target intrinsics that touch
1482/// memory and need an associated MachineMemOperand. Its opcode may be
1483/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1484/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1485class MemIntrinsicSDNode : public MemSDNode {
1486public:
1487 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1488 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1489 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1490 SDNodeBits.IsMemIntrinsic = true;
1491 }
1492
1493 // Methods to support isa and dyn_cast
1494 static bool classof(const SDNode *N) {
1495 // We lower some target intrinsics to their target opcode
1496 // early a node with a target opcode can be of this class
1497 return N->isMemIntrinsic() ||
1498 N->getOpcode() == ISD::PREFETCH ||
1499 N->isTargetMemoryOpcode();
1500 }
1501};
1502
1503/// This SDNode is used to implement the code generator
1504/// support for the llvm IR shufflevector instruction. It combines elements
1505/// from two input vectors into a new input vector, with the selection and
1506/// ordering of elements determined by an array of integers, referred to as
1507/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1508/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1509/// An index of -1 is treated as undef, such that the code generator may put
1510/// any value in the corresponding element of the result.
1511class ShuffleVectorSDNode : public SDNode {
1512 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1513 // is freed when the SelectionDAG object is destroyed.
1514 const int *Mask;
1515
1516protected:
1517 friend class SelectionDAG;
1518
1519 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1520 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1521
1522public:
1523 ArrayRef<int> getMask() const {
1524 EVT VT = getValueType(0);
1525 return makeArrayRef(Mask, VT.getVectorNumElements());
1526 }
1527
1528 int getMaskElt(unsigned Idx) const {
1529 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((Idx < getValueType(0).getVectorNumElements() && "Idx out of range!"
) ? static_cast<void> (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1529, __PRETTY_FUNCTION__))
;
1530 return Mask[Idx];
1531 }
1532
1533 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1534
1535 int getSplatIndex() const {
1536 assert(isSplat() && "Cannot get splat index for non-splat!")((isSplat() && "Cannot get splat index for non-splat!"
) ? static_cast<void> (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1536, __PRETTY_FUNCTION__))
;
1537 EVT VT = getValueType(0);
1538 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1539 if (Mask[i] >= 0)
1540 return Mask[i];
1541
1542 // We can choose any index value here and be correct because all elements
1543 // are undefined. Return 0 for better potential for callers to simplify.
1544 return 0;
1545 }
1546
1547 static bool isSplatMask(const int *Mask, EVT VT);
1548
1549 /// Change values in a shuffle permute mask assuming
1550 /// the two vector operands have swapped position.
1551 static void commuteMask(MutableArrayRef<int> Mask) {
1552 unsigned NumElems = Mask.size();
1553 for (unsigned i = 0; i != NumElems; ++i) {
1554 int idx = Mask[i];
1555 if (idx < 0)
1556 continue;
1557 else if (idx < (int)NumElems)
1558 Mask[i] = idx + NumElems;
1559 else
1560 Mask[i] = idx - NumElems;
1561 }
1562 }
1563
1564 static bool classof(const SDNode *N) {
1565 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1566 }
1567};
1568
1569class ConstantSDNode : public SDNode {
1570 friend class SelectionDAG;
1571
1572 const ConstantInt *Value;
1573
1574 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1575 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1576 getSDVTList(VT)),
1577 Value(val) {
1578 ConstantSDNodeBits.IsOpaque = isOpaque;
1579 }
1580
1581public:
1582 const ConstantInt *getConstantIntValue() const { return Value; }
1583 const APInt &getAPIntValue() const { return Value->getValue(); }
1584 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1585 int64_t getSExtValue() const { return Value->getSExtValue(); }
1586 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1587 return Value->getLimitedValue(Limit);
1588 }
1589
1590 bool isOne() const { return Value->isOne(); }
1591 bool isNullValue() const { return Value->isZero(); }
1592 bool isAllOnesValue() const { return Value->isMinusOne(); }
1593
1594 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1595
1596 static bool classof(const SDNode *N) {
1597 return N->getOpcode() == ISD::Constant ||
1598 N->getOpcode() == ISD::TargetConstant;
1599 }
1600};
1601
1602uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1603 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1604}
1605
1606const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1607 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1608}
1609
1610class ConstantFPSDNode : public SDNode {
1611 friend class SelectionDAG;
1612
1613 const ConstantFP *Value;
1614
1615 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1616 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1617 DebugLoc(), getSDVTList(VT)),
1618 Value(val) {}
1619
1620public:
1621 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1622 const ConstantFP *getConstantFPValue() const { return Value; }
1623
1624 /// Return true if the value is positive or negative zero.
1625 bool isZero() const { return Value->isZero(); }
1626
1627 /// Return true if the value is a NaN.
1628 bool isNaN() const { return Value->isNaN(); }
1629
1630 /// Return true if the value is an infinity
1631 bool isInfinity() const { return Value->isInfinity(); }
1632
1633 /// Return true if the value is negative.
1634 bool isNegative() const { return Value->isNegative(); }
1635
1636 /// We don't rely on operator== working on double values, as
1637 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1638 /// As such, this method can be used to do an exact bit-for-bit comparison of
1639 /// two floating point values.
1640
1641 /// We leave the version with the double argument here because it's just so
1642 /// convenient to write "2.0" and the like. Without this function we'd
1643 /// have to duplicate its logic everywhere it's called.
1644 bool isExactlyValue(double V) const {
1645 return Value->getValueAPF().isExactlyValue(V);
1646 }
1647 bool isExactlyValue(const APFloat& V) const;
1648
1649 static bool isValueValidForType(EVT VT, const APFloat& Val);
1650
1651 static bool classof(const SDNode *N) {
1652 return N->getOpcode() == ISD::ConstantFP ||
1653 N->getOpcode() == ISD::TargetConstantFP;
1654 }
1655};
1656
1657/// Returns true if \p V is a constant integer zero.
1658bool isNullConstant(SDValue V);
1659
1660/// Returns true if \p V is an FP constant with a value of positive zero.
1661bool isNullFPConstant(SDValue V);
1662
1663/// Returns true if \p V is an integer constant with all bits set.
1664bool isAllOnesConstant(SDValue V);
1665
1666/// Returns true if \p V is a constant integer one.
1667bool isOneConstant(SDValue V);
1668
1669/// Return the non-bitcasted source operand of \p V if it exists.
1670/// If \p V is not a bitcasted value, it is returned as-is.
1671SDValue peekThroughBitcasts(SDValue V);
1672
1673/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1674/// If \p V is not a bitcasted one-use value, it is returned as-is.
1675SDValue peekThroughOneUseBitcasts(SDValue V);
1676
1677/// Return the non-extracted vector source operand of \p V if it exists.
1678/// If \p V is not an extracted subvector, it is returned as-is.
1679SDValue peekThroughExtractSubvectors(SDValue V);
1680
1681/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1682/// constant is canonicalized to be operand 1.
1683bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1684
1685/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1686ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1687 bool AllowTruncation = false);
1688
1689/// Returns the SDNode if it is a demanded constant splat BuildVector or
1690/// constant int.
1691ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1692 bool AllowUndefs = false,
1693 bool AllowTruncation = false);
1694
1695/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1696ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1697
1698/// Returns the SDNode if it is a demanded constant splat BuildVector or
1699/// constant float.
1700ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1701 bool AllowUndefs = false);
1702
1703/// Return true if the value is a constant 0 integer or a splatted vector of
1704/// a constant 0 integer (with no undefs by default).
1705/// Build vector implicit truncation is not an issue for null values.
1706bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1707
1708/// Return true if the value is a constant 1 integer or a splatted vector of a
1709/// constant 1 integer (with no undefs).
1710/// Does not permit build vector implicit truncation.
1711bool isOneOrOneSplat(SDValue V);
1712
1713/// Return true if the value is a constant -1 integer or a splatted vector of a
1714/// constant -1 integer (with no undefs).
1715/// Does not permit build vector implicit truncation.
1716bool isAllOnesOrAllOnesSplat(SDValue V);
1717
1718class GlobalAddressSDNode : public SDNode {
1719 friend class SelectionDAG;
1720
1721 const GlobalValue *TheGlobal;
1722 int64_t Offset;
1723 unsigned TargetFlags;
1724
1725 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1726 const GlobalValue *GA, EVT VT, int64_t o,
1727 unsigned TF);
1728
1729public:
1730 const GlobalValue *getGlobal() const { return TheGlobal; }
1731 int64_t getOffset() const { return Offset; }
1732 unsigned getTargetFlags() const { return TargetFlags; }
1733 // Return the address space this GlobalAddress belongs to.
1734 unsigned getAddressSpace() const;
1735
1736 static bool classof(const SDNode *N) {
1737 return N->getOpcode() == ISD::GlobalAddress ||
1738 N->getOpcode() == ISD::TargetGlobalAddress ||
1739 N->getOpcode() == ISD::GlobalTLSAddress ||
1740 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1741 }
1742};
1743
1744class FrameIndexSDNode : public SDNode {
1745 friend class SelectionDAG;
1746
1747 int FI;
1748
1749 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1750 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1751 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1752 }
1753
1754public:
1755 int getIndex() const { return FI; }
1756
1757 static bool classof(const SDNode *N) {
1758 return N->getOpcode() == ISD::FrameIndex ||
1759 N->getOpcode() == ISD::TargetFrameIndex;
1760 }
1761};
1762
1763/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1764/// the offet and size that are started/ended in the underlying FrameIndex.
1765class LifetimeSDNode : public SDNode {
1766 friend class SelectionDAG;
1767 int64_t Size;
1768 int64_t Offset; // -1 if offset is unknown.
1769
1770 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1771 SDVTList VTs, int64_t Size, int64_t Offset)
1772 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1773public:
1774 int64_t getFrameIndex() const {
1775 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1776 }
1777
1778 bool hasOffset() const { return Offset >= 0; }
1779 int64_t getOffset() const {
1780 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1780, __PRETTY_FUNCTION__))
;
1781 return Offset;
1782 }
1783 int64_t getSize() const {
1784 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1784, __PRETTY_FUNCTION__))
;
1785 return Size;
1786 }
1787
1788 // Methods to support isa and dyn_cast
1789 static bool classof(const SDNode *N) {
1790 return N->getOpcode() == ISD::LIFETIME_START ||
1791 N->getOpcode() == ISD::LIFETIME_END;
1792 }
1793};
1794
1795class JumpTableSDNode : public SDNode {
1796 friend class SelectionDAG;
1797
1798 int JTI;
1799 unsigned TargetFlags;
1800
1801 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1802 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1803 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1804 }
1805
1806public:
1807 int getIndex() const { return JTI; }
1808 unsigned getTargetFlags() const { return TargetFlags; }
1809
1810 static bool classof(const SDNode *N) {
1811 return N->getOpcode() == ISD::JumpTable ||
1812 N->getOpcode() == ISD::TargetJumpTable;
1813 }
1814};
1815
1816class ConstantPoolSDNode : public SDNode {
1817 friend class SelectionDAG;
1818
1819 union {
1820 const Constant *ConstVal;
1821 MachineConstantPoolValue *MachineCPVal;
1822 } Val;
1823 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1824 unsigned Alignment; // Minimum alignment requirement of CP (not log2 value).
1825 unsigned TargetFlags;
1826
1827 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1828 unsigned Align, unsigned TF)
1829 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1830 DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
1831 TargetFlags(TF) {
1832 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1832, __PRETTY_FUNCTION__))
;
1833 Val.ConstVal = c;
1834 }
1835
1836 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
1837 EVT VT, int o, unsigned Align, unsigned TF)
1838 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1839 DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
1840 TargetFlags(TF) {
1841 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1841, __PRETTY_FUNCTION__))
;
1842 Val.MachineCPVal = v;
1843 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1844 }
1845
1846public:
1847 bool isMachineConstantPoolEntry() const {
1848 return Offset < 0;
1849 }
1850
1851 const Constant *getConstVal() const {
1852 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((!isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1852, __PRETTY_FUNCTION__))
;
1853 return Val.ConstVal;
1854 }
1855
1856 MachineConstantPoolValue *getMachineCPVal() const {
1857 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1857, __PRETTY_FUNCTION__))
;
1858 return Val.MachineCPVal;
1859 }
1860
1861 int getOffset() const {
1862 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1863 }
1864
1865 // Return the alignment of this constant pool object, which is either 0 (for
1866 // default alignment) or the desired value.
1867 unsigned getAlignment() const { return Alignment; }
1868 unsigned getTargetFlags() const { return TargetFlags; }
1869
1870 Type *getType() const;
1871
1872 static bool classof(const SDNode *N) {
1873 return N->getOpcode() == ISD::ConstantPool ||
1874 N->getOpcode() == ISD::TargetConstantPool;
1875 }
1876};
1877
1878/// Completely target-dependent object reference.
1879class TargetIndexSDNode : public SDNode {
1880 friend class SelectionDAG;
1881
1882 unsigned TargetFlags;
1883 int Index;
1884 int64_t Offset;
1885
1886public:
1887 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1888 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1889 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1890
1891 unsigned getTargetFlags() const { return TargetFlags; }
1892 int getIndex() const { return Index; }
1893 int64_t getOffset() const { return Offset; }
1894
1895 static bool classof(const SDNode *N) {
1896 return N->getOpcode() == ISD::TargetIndex;
1897 }
1898};
1899
1900class BasicBlockSDNode : public SDNode {
1901 friend class SelectionDAG;
1902
1903 MachineBasicBlock *MBB;
1904
1905 /// Debug info is meaningful and potentially useful here, but we create
1906 /// blocks out of order when they're jumped to, which makes it a bit
1907 /// harder. Let's see if we need it first.
1908 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1909 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1910 {}
1911
1912public:
1913 MachineBasicBlock *getBasicBlock() const { return MBB; }
1914
1915 static bool classof(const SDNode *N) {
1916 return N->getOpcode() == ISD::BasicBlock;
1917 }
1918};
1919
1920/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1921class BuildVectorSDNode : public SDNode {
1922public:
1923 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1924 explicit BuildVectorSDNode() = delete;
1925
1926 /// Check if this is a constant splat, and if so, find the
1927 /// smallest element size that splats the vector. If MinSplatBits is
1928 /// nonzero, the element size must be at least that large. Note that the
1929 /// splat element may be the entire vector (i.e., a one element vector).
1930 /// Returns the splat element value in SplatValue. Any undefined bits in
1931 /// that value are zero, and the corresponding bits in the SplatUndef mask
1932 /// are set. The SplatBitSize value is set to the splat element size in
1933 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1934 /// undefined. isBigEndian describes the endianness of the target.
1935 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1936 unsigned &SplatBitSize, bool &HasAnyUndefs,
1937 unsigned MinSplatBits = 0,
1938 bool isBigEndian = false) const;
1939
1940 /// Returns the demanded splatted value or a null value if this is not a
1941 /// splat.
1942 ///
1943 /// The DemandedElts mask indicates the elements that must be in the splat.
1944 /// If passed a non-null UndefElements bitvector, it will resize it to match
1945 /// the vector width and set the bits where elements are undef.
1946 SDValue getSplatValue(const APInt &DemandedElts,
1947 BitVector *UndefElements = nullptr) const;
1948
1949 /// Returns the splatted value or a null value if this is not a splat.
1950 ///
1951 /// If passed a non-null UndefElements bitvector, it will resize it to match
1952 /// the vector width and set the bits where elements are undef.
1953 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1954
1955 /// Returns the demanded splatted constant or null if this is not a constant
1956 /// splat.
1957 ///
1958 /// The DemandedElts mask indicates the elements that must be in the splat.
1959 /// If passed a non-null UndefElements bitvector, it will resize it to match
1960 /// the vector width and set the bits where elements are undef.
1961 ConstantSDNode *
1962 getConstantSplatNode(const APInt &DemandedElts,
1963 BitVector *UndefElements = nullptr) const;
1964
1965 /// Returns the splatted constant or null if this is not a constant
1966 /// splat.
1967 ///
1968 /// If passed a non-null UndefElements bitvector, it will resize it to match
1969 /// the vector width and set the bits where elements are undef.
1970 ConstantSDNode *
1971 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
1972
1973 /// Returns the demanded splatted constant FP or null if this is not a
1974 /// constant FP splat.
1975 ///
1976 /// The DemandedElts mask indicates the elements that must be in the splat.
1977 /// If passed a non-null UndefElements bitvector, it will resize it to match
1978 /// the vector width and set the bits where elements are undef.
1979 ConstantFPSDNode *
1980 getConstantFPSplatNode(const APInt &DemandedElts,
1981 BitVector *UndefElements = nullptr) const;
1982
1983 /// Returns the splatted constant FP or null if this is not a constant
1984 /// FP splat.
1985 ///
1986 /// If passed a non-null UndefElements bitvector, it will resize it to match
1987 /// the vector width and set the bits where elements are undef.
1988 ConstantFPSDNode *
1989 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
1990
1991 /// If this is a constant FP splat and the splatted constant FP is an
1992 /// exact power or 2, return the log base 2 integer value. Otherwise,
1993 /// return -1.
1994 ///
1995 /// The BitWidth specifies the necessary bit precision.
1996 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
1997 uint32_t BitWidth) const;
1998
1999 bool isConstant() const;
2000
2001 static bool classof(const SDNode *N) {
2002 return N->getOpcode() == ISD::BUILD_VECTOR;
2003 }
2004};
2005
2006/// An SDNode that holds an arbitrary LLVM IR Value. This is
2007/// used when the SelectionDAG needs to make a simple reference to something
2008/// in the LLVM IR representation.
2009///
2010class SrcValueSDNode : public SDNode {
2011 friend class SelectionDAG;
2012
2013 const Value *V;
2014
2015 /// Create a SrcValue for a general value.
2016 explicit SrcValueSDNode(const Value *v)
2017 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2018
2019public:
2020 /// Return the contained Value.
2021 const Value *getValue() const { return V; }
2022
2023 static bool classof(const SDNode *N) {
2024 return N->getOpcode() == ISD::SRCVALUE;
2025 }
2026};
2027
2028class MDNodeSDNode : public SDNode {
2029 friend class SelectionDAG;
2030
2031 const MDNode *MD;
2032
2033 explicit MDNodeSDNode(const MDNode *md)
2034 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2035 {}
2036
2037public:
2038 const MDNode *getMD() const { return MD; }
2039
2040 static bool classof(const SDNode *N) {
2041 return N->getOpcode() == ISD::MDNODE_SDNODE;
2042 }
2043};
2044
2045class RegisterSDNode : public SDNode {
2046 friend class SelectionDAG;
2047
2048 unsigned Reg;
2049
2050 RegisterSDNode(unsigned reg, EVT VT)
2051 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2052
2053public:
2054 unsigned getReg() const { return Reg; }
2055
2056 static bool classof(const SDNode *N) {
2057 return N->getOpcode() == ISD::Register;
2058 }
2059};
2060
2061class RegisterMaskSDNode : public SDNode {
2062 friend class SelectionDAG;
2063
2064 // The memory for RegMask is not owned by the node.
2065 const uint32_t *RegMask;
2066
2067 RegisterMaskSDNode(const uint32_t *mask)
2068 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2069 RegMask(mask) {}
2070
2071public:
2072 const uint32_t *getRegMask() const { return RegMask; }
2073
2074 static bool classof(const SDNode *N) {
2075 return N->getOpcode() == ISD::RegisterMask;
2076 }
2077};
2078
2079class BlockAddressSDNode : public SDNode {
2080 friend class SelectionDAG;
2081
2082 const BlockAddress *BA;
2083 int64_t Offset;
2084 unsigned TargetFlags;
2085
2086 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2087 int64_t o, unsigned Flags)
2088 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2089 BA(ba), Offset(o), TargetFlags(Flags) {}
2090
2091public:
2092 const BlockAddress *getBlockAddress() const { return BA; }
2093 int64_t getOffset() const { return Offset; }
2094 unsigned getTargetFlags() const { return TargetFlags; }
2095
2096 static bool classof(const SDNode *N) {
2097 return N->getOpcode() == ISD::BlockAddress ||
2098 N->getOpcode() == ISD::TargetBlockAddress;
2099 }
2100};
2101
2102class LabelSDNode : public SDNode {
2103 friend class SelectionDAG;
2104
2105 MCSymbol *Label;
2106
2107 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2108 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2109 assert(LabelSDNode::classof(this) && "not a label opcode")((LabelSDNode::classof(this) && "not a label opcode")
? static_cast<void> (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2109, __PRETTY_FUNCTION__))
;
2110 }
2111
2112public:
2113 MCSymbol *getLabel() const { return Label; }
2114
2115 static bool classof(const SDNode *N) {
2116 return N->getOpcode() == ISD::EH_LABEL ||
2117 N->getOpcode() == ISD::ANNOTATION_LABEL;
2118 }
2119};
2120
2121class ExternalSymbolSDNode : public SDNode {
2122 friend class SelectionDAG;
2123
2124 const char *Symbol;
2125 unsigned TargetFlags;
2126
2127 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2128 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2129 DebugLoc(), getSDVTList(VT)),
2130 Symbol(Sym), TargetFlags(TF) {}
2131
2132public:
2133 const char *getSymbol() const { return Symbol; }
2134 unsigned getTargetFlags() const { return TargetFlags; }
2135
2136 static bool classof(const SDNode *N) {
2137 return N->getOpcode() == ISD::ExternalSymbol ||
2138 N->getOpcode() == ISD::TargetExternalSymbol;
2139 }
2140};
2141
2142class MCSymbolSDNode : public SDNode {
2143 friend class SelectionDAG;
2144
2145 MCSymbol *Symbol;
2146
2147 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2148 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2149
2150public:
2151 MCSymbol *getMCSymbol() const { return Symbol; }
2152
2153 static bool classof(const SDNode *N) {
2154 return N->getOpcode() == ISD::MCSymbol;
2155 }
2156};
2157
2158class CondCodeSDNode : public SDNode {
2159 friend class SelectionDAG;
2160
2161 ISD::CondCode Condition;
2162
2163 explicit CondCodeSDNode(ISD::CondCode Cond)
2164 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2165 Condition(Cond) {}
2166
2167public:
2168 ISD::CondCode get() const { return Condition; }
2169
2170 static bool classof(const SDNode *N) {
2171 return N->getOpcode() == ISD::CONDCODE;
2172 }
2173};
2174
2175/// This class is used to represent EVT's, which are used
2176/// to parameterize some operations.
2177class VTSDNode : public SDNode {
2178 friend class SelectionDAG;
2179
2180 EVT ValueType;
2181
2182 explicit VTSDNode(EVT VT)
2183 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2184 ValueType(VT) {}
2185
2186public:
2187 EVT getVT() const { return ValueType; }
2188
2189 static bool classof(const SDNode *N) {
2190 return N->getOpcode() == ISD::VALUETYPE;
2191 }
2192};
2193
2194/// Base class for LoadSDNode and StoreSDNode
2195class LSBaseSDNode : public MemSDNode {
2196public:
2197 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2198 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2199 MachineMemOperand *MMO)
2200 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2201 LSBaseSDNodeBits.AddressingMode = AM;
2202 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2202, __PRETTY_FUNCTION__))
;
2203 }
2204
2205 const SDValue &getOffset() const {
2206 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2207 }
2208
2209 /// Return the addressing mode for this load or store:
2210 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2211 ISD::MemIndexedMode getAddressingMode() const {
2212 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2213 }
2214
2215 /// Return true if this is a pre/post inc/dec load/store.
2216 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2217
2218 /// Return true if this is NOT a pre/post inc/dec load/store.
2219 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2220
2221 static bool classof(const SDNode *N) {
2222 return N->getOpcode() == ISD::LOAD ||
2223 N->getOpcode() == ISD::STORE;
2224 }
2225};
2226
2227/// This class is used to represent ISD::LOAD nodes.
2228class LoadSDNode : public LSBaseSDNode {
2229 friend class SelectionDAG;
2230
2231 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2232 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2233 MachineMemOperand *MMO)
2234 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2235 LoadSDNodeBits.ExtTy = ETy;
2236 assert(readMem() && "Load MachineMemOperand is not a load!")((readMem() && "Load MachineMemOperand is not a load!"
) ? static_cast<void> (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2236, __PRETTY_FUNCTION__))
;
2237 assert(!writeMem() && "Load MachineMemOperand is a store!")((!writeMem() && "Load MachineMemOperand is a store!"
) ? static_cast<void> (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2237, __PRETTY_FUNCTION__))
;
2238 }
2239
2240public:
2241 /// Return whether this is a plain node,
2242 /// or one of the varieties of value-extending loads.
2243 ISD::LoadExtType getExtensionType() const {
2244 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2245 }
2246
2247 const SDValue &getBasePtr() const { return getOperand(1); }
2248 const SDValue &getOffset() const { return getOperand(2); }
2249
2250 static bool classof(const SDNode *N) {
2251 return N->getOpcode() == ISD::LOAD;
2252 }
2253};
2254
2255/// This class is used to represent ISD::STORE nodes.
2256class StoreSDNode : public LSBaseSDNode {
2257 friend class SelectionDAG;
2258
2259 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2260 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2261 MachineMemOperand *MMO)
2262 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2263 StoreSDNodeBits.IsTruncating = isTrunc;
2264 assert(!readMem() && "Store MachineMemOperand is a load!")((!readMem() && "Store MachineMemOperand is a load!")
? static_cast<void> (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2264, __PRETTY_FUNCTION__))
;
2265 assert(writeMem() && "Store MachineMemOperand is not a store!")((writeMem() && "Store MachineMemOperand is not a store!"
) ? static_cast<void> (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2265, __PRETTY_FUNCTION__))
;
2266 }
2267
2268public:
2269 /// Return true if the op does a truncation before store.
2270 /// For integers this is the same as doing a TRUNCATE and storing the result.
2271 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2272 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2273 void setTruncatingStore(bool Truncating) {
2274 StoreSDNodeBits.IsTruncating = Truncating;
2275 }
2276
2277 const SDValue &getValue() const { return getOperand(1); }
2278 const SDValue &getBasePtr() const { return getOperand(2); }
2279 const SDValue &getOffset() const { return getOperand(3); }
2280
2281 static bool classof(const SDNode *N) {
2282 return N->getOpcode() == ISD::STORE;
2283 }
2284};
2285
2286/// This base class is used to represent MLOAD and MSTORE nodes
2287class MaskedLoadStoreSDNode : public MemSDNode {
2288public:
2289 friend class SelectionDAG;
2290
2291 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2292 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2293 MachineMemOperand *MMO)
2294 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
2295
2296 // MaskedLoadSDNode (Chain, ptr, mask, passthru)
2297 // MaskedStoreSDNode (Chain, data, ptr, mask)
2298 // Mask is a vector of i1 elements
2299 const SDValue &getBasePtr() const {
2300 return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
2301 }
2302 const SDValue &getMask() const {
2303 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2304 }
2305
2306 static bool classof(const SDNode *N) {
2307 return N->getOpcode() == ISD::MLOAD ||
2308 N->getOpcode() == ISD::MSTORE;
2309 }
2310};
2311
2312/// This class is used to represent an MLOAD node
2313class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2314public:
2315 friend class SelectionDAG;
2316
2317 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2318 ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT,
2319 MachineMemOperand *MMO)
2320 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
2321 LoadSDNodeBits.ExtTy = ETy;
2322 LoadSDNodeBits.IsExpanding = IsExpanding;
2323 }
2324
2325 ISD::LoadExtType getExtensionType() const {
2326 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2327 }
2328
2329 const SDValue &getBasePtr() const { return getOperand(1); }
2330 const SDValue &getMask() const { return getOperand(2); }
2331 const SDValue &getPassThru() const { return getOperand(3); }
2332
2333 static bool classof(const SDNode *N) {
2334 return N->getOpcode() == ISD::MLOAD;
2335 }
2336
2337 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2338};
2339
2340/// This class is used to represent an MSTORE node
2341class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2342public:
2343 friend class SelectionDAG;
2344
2345 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2346 bool isTrunc, bool isCompressing, EVT MemVT,
2347 MachineMemOperand *MMO)
2348 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
2349 StoreSDNodeBits.IsTruncating = isTrunc;
2350 StoreSDNodeBits.IsCompressing = isCompressing;
2351 }
2352
2353 /// Return true if the op does a truncation before store.
2354 /// For integers this is the same as doing a TRUNCATE and storing the result.
2355 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2356 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2357
2358 /// Returns true if the op does a compression to the vector before storing.
2359 /// The node contiguously stores the active elements (integers or floats)
2360 /// in src (those with their respective bit set in writemask k) to unaligned
2361 /// memory at base_addr.
2362 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2363
2364 const SDValue &getValue() const { return getOperand(1); }
2365 const SDValue &getBasePtr() const { return getOperand(2); }
2366 const SDValue &getMask() const { return getOperand(3); }
2367
2368 static bool classof(const SDNode *N) {
2369 return N->getOpcode() == ISD::MSTORE;
2370 }
2371};
2372
2373/// This is a base class used to represent
2374/// MGATHER and MSCATTER nodes
2375///
2376class MaskedGatherScatterSDNode : public MemSDNode {
2377public:
2378 friend class SelectionDAG;
2379
2380 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2381 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2382 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2383 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2384 LSBaseSDNodeBits.AddressingMode = IndexType;
2385 assert(getIndexType() == IndexType && "Value truncated")((getIndexType() == IndexType && "Value truncated") ?
static_cast<void> (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2385, __PRETTY_FUNCTION__))
;
2386 }
2387
2388 /// How is Index applied to BasePtr when computing addresses.
2389 ISD::MemIndexType getIndexType() const {
2390 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2391 }
2392 bool isIndexScaled() const {
2393 return (getIndexType() == ISD::SIGNED_SCALED) ||
2394 (getIndexType() == ISD::UNSIGNED_SCALED);
2395 }
2396 bool isIndexSigned() const {
2397 return (getIndexType() == ISD::SIGNED_SCALED) ||
2398 (getIndexType() == ISD::SIGNED_UNSCALED);
2399 }
2400
2401 // In the both nodes address is Op1, mask is Op2:
2402 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2403 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2404 // Mask is a vector of i1 elements
2405 const SDValue &getBasePtr() const { return getOperand(3); }
2406 const SDValue &getIndex() const { return getOperand(4); }
2407 const SDValue &getMask() const { return getOperand(2); }
2408 const SDValue &getScale() const { return getOperand(5); }
2409
2410 static bool classof(const SDNode *N) {
2411 return N->getOpcode() == ISD::MGATHER ||
2412 N->getOpcode() == ISD::MSCATTER;
2413 }
2414};
2415
2416/// This class is used to represent an MGATHER node
2417///
2418class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2419public:
2420 friend class SelectionDAG;
2421
2422 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2423 EVT MemVT, MachineMemOperand *MMO,
2424 ISD::MemIndexType IndexType)
2425 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2426 IndexType) {}
2427
2428 const SDValue &getPassThru() const { return getOperand(1); }
2429
2430 static bool classof(const SDNode *N) {
2431 return N->getOpcode() == ISD::MGATHER;
2432 }
2433};
2434
2435/// This class is used to represent an MSCATTER node
2436///
2437class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2438public:
2439 friend class SelectionDAG;
2440
2441 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2442 EVT MemVT, MachineMemOperand *MMO,
2443 ISD::MemIndexType IndexType)
2444 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2445 IndexType) {}
2446
2447 const SDValue &getValue() const { return getOperand(1); }
2448
2449 static bool classof(const SDNode *N) {
2450 return N->getOpcode() == ISD::MSCATTER;
2451 }
2452};
2453
2454/// An SDNode that represents everything that will be needed
2455/// to construct a MachineInstr. These nodes are created during the
2456/// instruction selection proper phase.
2457///
2458/// Note that the only supported way to set the `memoperands` is by calling the
2459/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2460/// inside the DAG rather than in the node.
2461class MachineSDNode : public SDNode {
2462private:
2463 friend class SelectionDAG;
2464
2465 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2466 : SDNode(Opc, Order, DL, VTs) {}
2467
2468 // We use a pointer union between a single `MachineMemOperand` pointer and
2469 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2470 // the number of these is zero, the single pointer variant used when the
2471 // number is one, and the array is used for larger numbers.
2472 //
2473 // The array is allocated via the `SelectionDAG`'s allocator and so will
2474 // always live until the DAG is cleaned up and doesn't require ownership here.
2475 //
2476 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2477 // subclasses aren't managed in a conforming C++ manner. See the comments on
2478 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2479 // constraint here is that these don't manage memory with their constructor or
2480 // destructor and can be initialized to a good state even if they start off
2481 // uninitialized.
2482 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2483
2484 // Note that this could be folded into the above `MemRefs` member if doing so
2485 // is advantageous at some point. We don't need to store this in most cases.
2486 // However, at the moment this doesn't appear to make the allocation any
2487 // smaller and makes the code somewhat simpler to read.
2488 int NumMemRefs = 0;
2489
2490public:
2491 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2492
2493 ArrayRef<MachineMemOperand *> memoperands() const {
2494 // Special case the common cases.
2495 if (NumMemRefs == 0)
2496 return {};
2497 if (NumMemRefs == 1)
2498 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2499
2500 // Otherwise we have an actual array.
2501 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2502 }
2503 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2504 mmo_iterator memoperands_end() const { return memoperands().end(); }
2505 bool memoperands_empty() const { return memoperands().empty(); }
2506
2507 /// Clear out the memory reference descriptor list.
2508 void clearMemRefs() {
2509 MemRefs = nullptr;
2510 NumMemRefs = 0;
2511 }
2512
2513 static bool classof(const SDNode *N) {
2514 return N->isMachineOpcode();
2515 }
2516};
2517
2518class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
2519 SDNode, ptrdiff_t> {
2520 const SDNode *Node;
2521 unsigned Operand;
2522
2523 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2524
2525public:
2526 bool operator==(const SDNodeIterator& x) const {
2527 return Operand == x.Operand;
2528 }
2529 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2530
2531 pointer operator*() const {
2532 return Node->getOperand(Operand).getNode();
2533 }
2534 pointer operator->() const { return operator*(); }
2535
2536 SDNodeIterator& operator++() { // Preincrement
2537 ++Operand;
2538 return *this;
2539 }
2540 SDNodeIterator operator++(int) { // Postincrement
2541 SDNodeIterator tmp = *this; ++*this; return tmp;
2542 }
2543 size_t operator-(SDNodeIterator Other) const {
2544 assert(Node == Other.Node &&((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2545, __PRETTY_FUNCTION__))
2545 "Cannot compare iterators of two different nodes!")((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-10~svn372087/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2545, __PRETTY_FUNCTION__))
;
2546 return Operand - Other.Operand;
2547 }
2548
2549 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2550 static SDNodeIterator end (const SDNode *N) {
2551 return SDNodeIterator(N, N->getNumOperands());
2552 }
2553
2554 unsigned getOperand() const { return Operand; }
2555 const SDNode *getNode() const { return Node; }
2556};
2557
2558template <> struct GraphTraits<SDNode*> {
2559 using NodeRef = SDNode *;
2560 using ChildIteratorType = SDNodeIterator;
2561
2562 static NodeRef getEntryNode(SDNode *N) { return N; }
2563
2564 static ChildIteratorType child_begin(NodeRef N) {
2565 return SDNodeIterator::begin(N);
2566 }
2567
2568 static ChildIteratorType child_end(NodeRef N) {
2569 return SDNodeIterator::end(N);
2570 }
2571};
2572
2573/// A representation of the largest SDNode, for use in sizeof().
2574///
2575/// This needs to be a union because the largest node differs on 32 bit systems
2576/// with 4 and 8 byte pointer alignment, respectively.
2577using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2578 BlockAddressSDNode,
2579 GlobalAddressSDNode>;
2580
2581/// The SDNode class with the greatest alignment requirement.
2582using MostAlignedSDNode = GlobalAddressSDNode;
2583
2584namespace ISD {
2585
2586 /// Returns true if the specified node is a non-extending and unindexed load.
2587 inline bool isNormalLoad(const SDNode *N) {
2588 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2589 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2590 Ld->getAddressingMode() == ISD::UNINDEXED;
2591 }
2592
2593 /// Returns true if the specified node is a non-extending load.
2594 inline bool isNON_EXTLoad(const SDNode *N) {
2595 return isa<LoadSDNode>(N) &&
2596 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2597 }
2598
2599 /// Returns true if the specified node is a EXTLOAD.
2600 inline bool isEXTLoad(const SDNode *N) {
2601 return isa<LoadSDNode>(N) &&
2602 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2603 }
2604
2605 /// Returns true if the specified node is a SEXTLOAD.
2606 inline bool isSEXTLoad(const SDNode *N) {
2607 return isa<LoadSDNode>(N) &&
2608 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2609 }
2610
2611 /// Returns true if the specified node is a ZEXTLOAD.
2612 inline bool isZEXTLoad(const SDNode *N) {
2613 return isa<LoadSDNode>(N) &&
2614 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2615 }
2616
2617 /// Returns true if the specified node is an unindexed load.
2618 inline bool isUNINDEXEDLoad(const SDNode *N) {
2619 return isa<LoadSDNode>(N) &&
2620 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2621 }
2622
2623 /// Returns true if the specified node is a non-truncating
2624 /// and unindexed store.
2625 inline bool isNormalStore(const SDNode *N) {
2626 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2627 return St && !St->isTruncatingStore() &&
2628 St->getAddressingMode() == ISD::UNINDEXED;
2629 }
2630
2631 /// Returns true if the specified node is a non-truncating store.
2632 inline bool isNON_TRUNCStore(const SDNode *N) {
2633 return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
2634 }
2635
2636 /// Returns true if the specified node is a truncating store.
2637 inline bool isTRUNCStore(const SDNode *N) {
2638 return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
2639 }
2640
2641 /// Returns true if the specified node is an unindexed store.
2642 inline bool isUNINDEXEDStore(const SDNode *N) {
2643 return isa<StoreSDNode>(N) &&
2644 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2645 }
2646
2647 /// Attempt to match a unary predicate against a scalar/splat constant or
2648 /// every element of a constant BUILD_VECTOR.
2649 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2650 bool matchUnaryPredicate(SDValue Op,
2651 std::function<bool(ConstantSDNode *)> Match,
2652 bool AllowUndefs = false);
2653
2654 /// Attempt to match a binary predicate against a pair of scalar/splat
2655 /// constants or every element of a pair of constant BUILD_VECTORs.
2656 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2657 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2658 bool matchBinaryPredicate(
2659 SDValue LHS, SDValue RHS,
2660 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2661 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2662} // end namespace ISD
2663
2664} // end namespace llvm
2665
2666#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H