Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 16675, column 9
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/PowerPC -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCRegisterInfo.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MachineValueType.h"
90#include "llvm/Support/MathExtras.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Target/TargetMachine.h"
93#include "llvm/Target/TargetOptions.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124static cl::opt<bool> EnableQuadwordAtomics(
125 "ppc-quadword-atomics",
126 cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
127 cl::Hidden);
128
129static cl::opt<bool>
130 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
131 cl::desc("disable vector permute decomposition"),
132 cl::init(true), cl::Hidden);
133
134STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"}
;
135STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"}
;
136STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
}
;
137STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
}
;
138
139static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
140
141static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
142
143static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
144
145// FIXME: Remove this once the bug has been fixed!
146extern cl::opt<bool> ANDIGlueBug;
147
148PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
149 const PPCSubtarget &STI)
150 : TargetLowering(TM), Subtarget(STI) {
151 // Initialize map that relates the PPC addressing modes to the computed flags
152 // of a load/store instruction. The map is used to determine the optimal
153 // addressing mode when selecting load and stores.
154 initializeAddrModeMap();
155 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
156 // arguments are at least 4/8 bytes aligned.
157 bool isPPC64 = Subtarget.isPPC64();
158 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
159
160 // Set up the register classes.
161 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
162 if (!useSoftFloat()) {
163 if (hasSPE()) {
164 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
165 // EFPU2 APU only supports f32
166 if (!Subtarget.hasEFPU2())
167 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
168 } else {
169 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
170 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
171 }
172 }
173
174 // Match BITREVERSE to customized fast code sequence in the td file.
175 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
176 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
177
178 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
179 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
180
181 // Custom lower inline assembly to check for special registers.
182 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
183 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
184
185 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
186 for (MVT VT : MVT::integer_valuetypes()) {
187 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
188 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
189 }
190
191 if (Subtarget.isISA3_0()) {
192 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
193 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
194 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
195 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
196 } else {
197 // No extending loads from f16 or HW conversions back and forth.
198 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
199 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
200 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
201 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
202 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
203 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
204 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
205 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
206 }
207
208 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
209
210 // PowerPC has pre-inc load and store's.
211 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
212 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
213 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
214 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
215 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
216 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
217 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
218 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
219 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
220 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
221 if (!Subtarget.hasSPE()) {
222 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
223 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
224 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
225 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
226 }
227
228 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
229 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
230 for (MVT VT : ScalarIntVTs) {
231 setOperationAction(ISD::ADDC, VT, Legal);
232 setOperationAction(ISD::ADDE, VT, Legal);
233 setOperationAction(ISD::SUBC, VT, Legal);
234 setOperationAction(ISD::SUBE, VT, Legal);
235 }
236
237 if (Subtarget.useCRBits()) {
238 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
239
240 if (isPPC64 || Subtarget.hasFPCVT()) {
241 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
242 AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
243 isPPC64 ? MVT::i64 : MVT::i32);
244 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
245 AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
246 isPPC64 ? MVT::i64 : MVT::i32);
247
248 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
249 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
250 isPPC64 ? MVT::i64 : MVT::i32);
251 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
252 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
253 isPPC64 ? MVT::i64 : MVT::i32);
254
255 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
256 AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
257 isPPC64 ? MVT::i64 : MVT::i32);
258 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
259 AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
260 isPPC64 ? MVT::i64 : MVT::i32);
261
262 setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
263 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
264 isPPC64 ? MVT::i64 : MVT::i32);
265 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
266 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
267 isPPC64 ? MVT::i64 : MVT::i32);
268 } else {
269 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
270 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
271 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
273 }
274
275 // PowerPC does not support direct load/store of condition registers.
276 setOperationAction(ISD::LOAD, MVT::i1, Custom);
277 setOperationAction(ISD::STORE, MVT::i1, Custom);
278
279 // FIXME: Remove this once the ANDI glue bug is fixed:
280 if (ANDIGlueBug)
281 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
282
283 for (MVT VT : MVT::integer_valuetypes()) {
284 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
285 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
286 setTruncStoreAction(VT, MVT::i1, Expand);
287 }
288
289 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
290 }
291
292 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
293 // PPC (the libcall is not available).
294 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
295 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
296 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
297 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
298
299 // We do not currently implement these libm ops for PowerPC.
300 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
301 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
302 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
303 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
304 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
305 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
306
307 // PowerPC has no SREM/UREM instructions unless we are on P9
308 // On P9 we may use a hardware instruction to compute the remainder.
309 // When the result of both the remainder and the division is required it is
310 // more efficient to compute the remainder from the result of the division
311 // rather than use the remainder instruction. The instructions are legalized
312 // directly because the DivRemPairsPass performs the transformation at the IR
313 // level.
314 if (Subtarget.isISA3_0()) {
315 setOperationAction(ISD::SREM, MVT::i32, Legal);
316 setOperationAction(ISD::UREM, MVT::i32, Legal);
317 setOperationAction(ISD::SREM, MVT::i64, Legal);
318 setOperationAction(ISD::UREM, MVT::i64, Legal);
319 } else {
320 setOperationAction(ISD::SREM, MVT::i32, Expand);
321 setOperationAction(ISD::UREM, MVT::i32, Expand);
322 setOperationAction(ISD::SREM, MVT::i64, Expand);
323 setOperationAction(ISD::UREM, MVT::i64, Expand);
324 }
325
326 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
327 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
328 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
329 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
330 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
331 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
332 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
333 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
334 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
335
336 // Handle constrained floating-point operations of scalar.
337 // TODO: Handle SPE specific operation.
338 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
339 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
340 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
341 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
342 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
343
344 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
345 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
346 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
347 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
348
349 if (!Subtarget.hasSPE()) {
350 setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
351 setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
352 }
353
354 if (Subtarget.hasVSX()) {
355 setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
356 setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
357 }
358
359 if (Subtarget.hasFSQRT()) {
360 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
361 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
362 }
363
364 if (Subtarget.hasFPRND()) {
365 setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
366 setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
367 setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
368 setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
369
370 setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
371 setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
372 setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
373 setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
374 }
375
376 // We don't support sin/cos/sqrt/fmod/pow
377 setOperationAction(ISD::FSIN , MVT::f64, Expand);
378 setOperationAction(ISD::FCOS , MVT::f64, Expand);
379 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
380 setOperationAction(ISD::FREM , MVT::f64, Expand);
381 setOperationAction(ISD::FPOW , MVT::f64, Expand);
382 setOperationAction(ISD::FSIN , MVT::f32, Expand);
383 setOperationAction(ISD::FCOS , MVT::f32, Expand);
384 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
385 setOperationAction(ISD::FREM , MVT::f32, Expand);
386 setOperationAction(ISD::FPOW , MVT::f32, Expand);
387
388 // MASS transformation for LLVM intrinsics with replicating fast-math flag
389 // to be consistent to PPCGenScalarMASSEntries pass
390 if (TM.getOptLevel() == CodeGenOpt::Aggressive &&
391 TM.Options.PPCGenScalarMASSEntries) {
392 setOperationAction(ISD::FSIN , MVT::f64, Custom);
393 setOperationAction(ISD::FCOS , MVT::f64, Custom);
394 setOperationAction(ISD::FPOW , MVT::f64, Custom);
395 setOperationAction(ISD::FLOG, MVT::f64, Custom);
396 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
397 setOperationAction(ISD::FEXP, MVT::f64, Custom);
398 setOperationAction(ISD::FSIN , MVT::f32, Custom);
399 setOperationAction(ISD::FCOS , MVT::f32, Custom);
400 setOperationAction(ISD::FPOW , MVT::f32, Custom);
401 setOperationAction(ISD::FLOG, MVT::f32, Custom);
402 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
403 setOperationAction(ISD::FEXP, MVT::f32, Custom);
404 }
405
406 if (Subtarget.hasSPE()) {
407 setOperationAction(ISD::FMA , MVT::f64, Expand);
408 setOperationAction(ISD::FMA , MVT::f32, Expand);
409 } else {
410 setOperationAction(ISD::FMA , MVT::f64, Legal);
411 setOperationAction(ISD::FMA , MVT::f32, Legal);
412 }
413
414 if (Subtarget.hasSPE())
415 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
416
417 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
418
419 // If we're enabling GP optimizations, use hardware square root
420 if (!Subtarget.hasFSQRT() &&
421 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
422 Subtarget.hasFRE()))
423 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
424
425 if (!Subtarget.hasFSQRT() &&
426 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
427 Subtarget.hasFRES()))
428 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
429
430 if (Subtarget.hasFCPSGN()) {
431 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
432 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
433 } else {
434 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
435 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
436 }
437
438 if (Subtarget.hasFPRND()) {
439 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
440 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
441 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
442 setOperationAction(ISD::FROUND, MVT::f64, Legal);
443
444 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
445 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
446 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
447 setOperationAction(ISD::FROUND, MVT::f32, Legal);
448 }
449
450 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
451 // to speed up scalar BSWAP64.
452 // CTPOP or CTTZ were introduced in P8/P9 respectively
453 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
454 if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
455 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
456 else
457 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
458 if (Subtarget.isISA3_0()) {
459 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
460 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
461 } else {
462 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
463 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
464 }
465
466 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
467 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
468 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
469 } else {
470 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
471 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
472 }
473
474 // PowerPC does not have ROTR
475 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
476 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
477
478 if (!Subtarget.useCRBits()) {
479 // PowerPC does not have Select
480 setOperationAction(ISD::SELECT, MVT::i32, Expand);
481 setOperationAction(ISD::SELECT, MVT::i64, Expand);
482 setOperationAction(ISD::SELECT, MVT::f32, Expand);
483 setOperationAction(ISD::SELECT, MVT::f64, Expand);
484 }
485
486 // PowerPC wants to turn select_cc of FP into fsel when possible.
487 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
488 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
489
490 // PowerPC wants to optimize integer setcc a bit
491 if (!Subtarget.useCRBits())
492 setOperationAction(ISD::SETCC, MVT::i32, Custom);
493
494 if (Subtarget.hasFPU()) {
495 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
496 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
497 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
498
499 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
500 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
501 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
502 }
503
504 // PowerPC does not have BRCOND which requires SetCC
505 if (!Subtarget.useCRBits())
506 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
507
508 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
509
510 if (Subtarget.hasSPE()) {
511 // SPE has built-in conversions
512 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
513 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
514 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
515 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
516 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
517 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
518
519 // SPE supports signaling compare of f32/f64.
520 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
521 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
522 } else {
523 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
524 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
525 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
526
527 // PowerPC does not have [U|S]INT_TO_FP
528 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
529 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
530 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
531 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
532 }
533
534 if (Subtarget.hasDirectMove() && isPPC64) {
535 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
536 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
537 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
538 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
539 if (TM.Options.UnsafeFPMath) {
540 setOperationAction(ISD::LRINT, MVT::f64, Legal);
541 setOperationAction(ISD::LRINT, MVT::f32, Legal);
542 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
543 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
544 setOperationAction(ISD::LROUND, MVT::f64, Legal);
545 setOperationAction(ISD::LROUND, MVT::f32, Legal);
546 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
547 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
548 }
549 } else {
550 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
551 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
552 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
553 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
554 }
555
556 // We cannot sextinreg(i1). Expand to shifts.
557 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
558
559 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
560 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
561 // support continuation, user-level threading, and etc.. As a result, no
562 // other SjLj exception interfaces are implemented and please don't build
563 // your own exception handling based on them.
564 // LLVM/Clang supports zero-cost DWARF exception handling.
565 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
566 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
567
568 // We want to legalize GlobalAddress and ConstantPool nodes into the
569 // appropriate instructions to materialize the address.
570 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
571 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
572 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
573 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
574 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
575 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
576 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
577 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
578 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
579 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
580
581 // TRAP is legal.
582 setOperationAction(ISD::TRAP, MVT::Other, Legal);
583
584 // TRAMPOLINE is custom lowered.
585 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
586 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
587
588 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
589 setOperationAction(ISD::VASTART , MVT::Other, Custom);
590
591 if (Subtarget.is64BitELFABI()) {
592 // VAARG always uses double-word chunks, so promote anything smaller.
593 setOperationAction(ISD::VAARG, MVT::i1, Promote);
594 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
595 setOperationAction(ISD::VAARG, MVT::i8, Promote);
596 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
597 setOperationAction(ISD::VAARG, MVT::i16, Promote);
598 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
599 setOperationAction(ISD::VAARG, MVT::i32, Promote);
600 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
601 setOperationAction(ISD::VAARG, MVT::Other, Expand);
602 } else if (Subtarget.is32BitELFABI()) {
603 // VAARG is custom lowered with the 32-bit SVR4 ABI.
604 setOperationAction(ISD::VAARG, MVT::Other, Custom);
605 setOperationAction(ISD::VAARG, MVT::i64, Custom);
606 } else
607 setOperationAction(ISD::VAARG, MVT::Other, Expand);
608
609 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
610 if (Subtarget.is32BitELFABI())
611 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
612 else
613 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
614
615 // Use the default implementation.
616 setOperationAction(ISD::VAEND , MVT::Other, Expand);
617 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
618 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
619 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
620 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
621 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
622 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
623 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
624 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
625
626 // We want to custom lower some of our intrinsics.
627 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
628 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
629 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
630 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
631 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
632
633 // To handle counter-based loop conditions.
634 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
635
636 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
637 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
638 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
639 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
640
641 // Comparisons that require checking two conditions.
642 if (Subtarget.hasSPE()) {
643 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
644 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
645 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
646 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
647 }
648 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
649 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
650 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
651 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
652 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
653 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
654 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
655 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
656 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
657 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
658 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
659 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
660
661 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
662 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
663
664 if (Subtarget.has64BitSupport()) {
665 // They also have instructions for converting between i64 and fp.
666 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
667 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
668 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
669 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
670 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
671 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
672 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
673 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
674 // This is just the low 32 bits of a (signed) fp->i64 conversion.
675 // We cannot do this with Promote because i64 is not a legal type.
676 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
677 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
678
679 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
680 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
681 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
682 }
683 } else {
684 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
685 if (Subtarget.hasSPE()) {
686 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
687 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
688 } else {
689 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
690 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
691 }
692 }
693
694 // With the instructions enabled under FPCVT, we can do everything.
695 if (Subtarget.hasFPCVT()) {
696 if (Subtarget.has64BitSupport()) {
697 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
698 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
699 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
700 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
701 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
702 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
703 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
704 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
705 }
706
707 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
708 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
709 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
710 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
711 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
712 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
713 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
714 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
715 }
716
717 if (Subtarget.use64BitRegs()) {
718 // 64-bit PowerPC implementations can support i64 types directly
719 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
720 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
721 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
722 // 64-bit PowerPC wants to expand i128 shifts itself.
723 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
724 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
725 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
726 } else {
727 // 32-bit PowerPC wants to expand i64 shifts itself.
728 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
729 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
730 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
731 }
732
733 // PowerPC has better expansions for funnel shifts than the generic
734 // TargetLowering::expandFunnelShift.
735 if (Subtarget.has64BitSupport()) {
736 setOperationAction(ISD::FSHL, MVT::i64, Custom);
737 setOperationAction(ISD::FSHR, MVT::i64, Custom);
738 }
739 setOperationAction(ISD::FSHL, MVT::i32, Custom);
740 setOperationAction(ISD::FSHR, MVT::i32, Custom);
741
742 if (Subtarget.hasVSX()) {
743 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
744 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
745 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
746 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
747 }
748
749 if (Subtarget.hasAltivec()) {
750 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
751 setOperationAction(ISD::SADDSAT, VT, Legal);
752 setOperationAction(ISD::SSUBSAT, VT, Legal);
753 setOperationAction(ISD::UADDSAT, VT, Legal);
754 setOperationAction(ISD::USUBSAT, VT, Legal);
755 }
756 // First set operation action for all vector types to expand. Then we
757 // will selectively turn on ones that can be effectively codegen'd.
758 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
759 // add/sub are legal for all supported vector VT's.
760 setOperationAction(ISD::ADD, VT, Legal);
761 setOperationAction(ISD::SUB, VT, Legal);
762
763 // For v2i64, these are only valid with P8Vector. This is corrected after
764 // the loop.
765 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
766 setOperationAction(ISD::SMAX, VT, Legal);
767 setOperationAction(ISD::SMIN, VT, Legal);
768 setOperationAction(ISD::UMAX, VT, Legal);
769 setOperationAction(ISD::UMIN, VT, Legal);
770 }
771 else {
772 setOperationAction(ISD::SMAX, VT, Expand);
773 setOperationAction(ISD::SMIN, VT, Expand);
774 setOperationAction(ISD::UMAX, VT, Expand);
775 setOperationAction(ISD::UMIN, VT, Expand);
776 }
777
778 if (Subtarget.hasVSX()) {
779 setOperationAction(ISD::FMAXNUM, VT, Legal);
780 setOperationAction(ISD::FMINNUM, VT, Legal);
781 }
782
783 // Vector instructions introduced in P8
784 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
785 setOperationAction(ISD::CTPOP, VT, Legal);
786 setOperationAction(ISD::CTLZ, VT, Legal);
787 }
788 else {
789 setOperationAction(ISD::CTPOP, VT, Expand);
790 setOperationAction(ISD::CTLZ, VT, Expand);
791 }
792
793 // Vector instructions introduced in P9
794 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
795 setOperationAction(ISD::CTTZ, VT, Legal);
796 else
797 setOperationAction(ISD::CTTZ, VT, Expand);
798
799 // We promote all shuffles to v16i8.
800 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
801 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
802
803 // We promote all non-typed operations to v4i32.
804 setOperationAction(ISD::AND , VT, Promote);
805 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
806 setOperationAction(ISD::OR , VT, Promote);
807 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
808 setOperationAction(ISD::XOR , VT, Promote);
809 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
810 setOperationAction(ISD::LOAD , VT, Promote);
811 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
812 setOperationAction(ISD::SELECT, VT, Promote);
813 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
814 setOperationAction(ISD::VSELECT, VT, Legal);
815 setOperationAction(ISD::SELECT_CC, VT, Promote);
816 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
817 setOperationAction(ISD::STORE, VT, Promote);
818 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
819
820 // No other operations are legal.
821 setOperationAction(ISD::MUL , VT, Expand);
822 setOperationAction(ISD::SDIV, VT, Expand);
823 setOperationAction(ISD::SREM, VT, Expand);
824 setOperationAction(ISD::UDIV, VT, Expand);
825 setOperationAction(ISD::UREM, VT, Expand);
826 setOperationAction(ISD::FDIV, VT, Expand);
827 setOperationAction(ISD::FREM, VT, Expand);
828 setOperationAction(ISD::FNEG, VT, Expand);
829 setOperationAction(ISD::FSQRT, VT, Expand);
830 setOperationAction(ISD::FLOG, VT, Expand);
831 setOperationAction(ISD::FLOG10, VT, Expand);
832 setOperationAction(ISD::FLOG2, VT, Expand);
833 setOperationAction(ISD::FEXP, VT, Expand);
834 setOperationAction(ISD::FEXP2, VT, Expand);
835 setOperationAction(ISD::FSIN, VT, Expand);
836 setOperationAction(ISD::FCOS, VT, Expand);
837 setOperationAction(ISD::FABS, VT, Expand);
838 setOperationAction(ISD::FFLOOR, VT, Expand);
839 setOperationAction(ISD::FCEIL, VT, Expand);
840 setOperationAction(ISD::FTRUNC, VT, Expand);
841 setOperationAction(ISD::FRINT, VT, Expand);
842 setOperationAction(ISD::FNEARBYINT, VT, Expand);
843 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
844 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
845 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
846 setOperationAction(ISD::MULHU, VT, Expand);
847 setOperationAction(ISD::MULHS, VT, Expand);
848 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
849 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
850 setOperationAction(ISD::UDIVREM, VT, Expand);
851 setOperationAction(ISD::SDIVREM, VT, Expand);
852 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
853 setOperationAction(ISD::FPOW, VT, Expand);
854 setOperationAction(ISD::BSWAP, VT, Expand);
855 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
856 setOperationAction(ISD::ROTL, VT, Expand);
857 setOperationAction(ISD::ROTR, VT, Expand);
858
859 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
860 setTruncStoreAction(VT, InnerVT, Expand);
861 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
862 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
863 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
864 }
865 }
866 setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
867 if (!Subtarget.hasP8Vector()) {
868 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
869 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
870 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
871 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
872 }
873
874 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
875 // with merges, splats, etc.
876 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
877
878 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
879 // are cheap, so handle them before they get expanded to scalar.
880 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
881 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
882 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
883 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
884 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
885
886 setOperationAction(ISD::AND , MVT::v4i32, Legal);
887 setOperationAction(ISD::OR , MVT::v4i32, Legal);
888 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
889 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
890 setOperationAction(ISD::SELECT, MVT::v4i32,
891 Subtarget.useCRBits() ? Legal : Expand);
892 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
893 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
894 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
895 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
896 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
897 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
898 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
899 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
900 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
901 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
902 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
903 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
904 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
905
906 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
907 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
908 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
909 if (Subtarget.hasAltivec())
910 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
911 setOperationAction(ISD::ROTL, VT, Legal);
912 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
913 if (Subtarget.hasP8Altivec())
914 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
915
916 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
917 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
918 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
919 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
920
921 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
922 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
923
924 if (Subtarget.hasVSX()) {
925 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
926 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
927 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
928 }
929
930 if (Subtarget.hasP8Altivec())
931 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
932 else
933 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
934
935 if (Subtarget.isISA3_1()) {
936 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
937 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
938 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
939 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
940 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
941 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
942 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
943 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
944 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
945 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
946 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
947 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
948 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
949 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
950 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
951 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
952 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
953 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
954 }
955
956 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
957 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
958
959 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
960 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
961
962 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
963 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
964 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
965 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
966
967 // Altivec does not contain unordered floating-point compare instructions
968 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
969 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
970 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
971 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
972
973 if (Subtarget.hasVSX()) {
974 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
975 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
976 if (Subtarget.hasP8Vector()) {
977 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
978 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
979 }
980 if (Subtarget.hasDirectMove() && isPPC64) {
981 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
982 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
983 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
984 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
985 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
986 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
987 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
988 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
989 }
990 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
991
992 // The nearbyint variants are not allowed to raise the inexact exception
993 // so we can only code-gen them with unsafe math.
994 if (TM.Options.UnsafeFPMath) {
995 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
996 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
997 }
998
999 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1000 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1001 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1002 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1003 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1004 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1005 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1006 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1007
1008 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1009 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1010 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1011 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1012 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1013
1014 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1015 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1016
1017 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1018 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1019
1020 // Share the Altivec comparison restrictions.
1021 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1022 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1023 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1024 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1025
1026 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1027 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1028
1029 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
1030
1031 if (Subtarget.hasP8Vector())
1032 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1033
1034 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1035
1036 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1037 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1038 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1039
1040 if (Subtarget.hasP8Altivec()) {
1041 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1042 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1043 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1044
1045 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1046 // SRL, but not for SRA because of the instructions available:
1047 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1048 // doing
1049 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1050 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1051 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1052
1053 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1054 }
1055 else {
1056 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1057 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1058 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1059
1060 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1061
1062 // VSX v2i64 only supports non-arithmetic operations.
1063 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1064 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1065 }
1066
1067 if (Subtarget.isISA3_1())
1068 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1069 else
1070 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1071
1072 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1073 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1074 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1075 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1076
1077 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1078
1079 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1080 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1081 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1082 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1084 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1085 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1086 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1087
1088 // Custom handling for partial vectors of integers converted to
1089 // floating point. We already have optimal handling for v2i32 through
1090 // the DAG combine, so those aren't necessary.
1091 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1092 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1093 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1094 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1095 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1096 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1097 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1098 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1099 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1100 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1101 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1102 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1103 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1104 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1105 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1106 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1107
1108 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1109 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1110 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1111 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1112 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1113 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1114
1115 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1116 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1117
1118 // Handle constrained floating-point operations of vector.
1119 // The predictor is `hasVSX` because altivec instruction has
1120 // no exception but VSX vector instruction has.
1121 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1122 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1123 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1124 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1125 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1126 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1127 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1128 setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1129 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1130 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1131 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1132 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1133 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1134
1135 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1136 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1137 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1138 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1139 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1140 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1141 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1142 setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1143 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1144 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1145 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1146 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1147 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1148
1149 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1150 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1151
1152 for (MVT FPT : MVT::fp_valuetypes())
1153 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1154
1155 // Expand the SELECT to SELECT_CC
1156 setOperationAction(ISD::SELECT, MVT::f128, Expand);
1157
1158 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1159 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1160
1161 // No implementation for these ops for PowerPC.
1162 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1163 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1164 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1165 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1166 setOperationAction(ISD::FREM, MVT::f128, Expand);
1167 }
1168
1169 if (Subtarget.hasP8Altivec()) {
1170 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1171 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1172 }
1173
1174 if (Subtarget.hasP9Vector()) {
1175 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1176 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1177
1178 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1179 // SRL, but not for SRA because of the instructions available:
1180 // VS{RL} and VS{RL}O.
1181 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1182 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1183 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1184
1185 setOperationAction(ISD::FADD, MVT::f128, Legal);
1186 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1187 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1188 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1189 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1190
1191 setOperationAction(ISD::FMA, MVT::f128, Legal);
1192 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1193 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1194 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1195 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1196 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1197 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1198
1199 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1200 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1201 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1202 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1203 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1204 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1205
1206 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1207 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1208 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1209
1210 // Handle constrained floating-point operations of fp128
1211 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1212 setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1213 setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1214 setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1215 setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1216 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1217 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1218 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1219 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1220 setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1221 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1222 setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1223 setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1224 setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1225 setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1226 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1227 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1228 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1229 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1230 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1231 } else if (Subtarget.hasVSX()) {
1232 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1233 setOperationAction(ISD::STORE, MVT::f128, Promote);
1234
1235 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1236 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1237
1238 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1239 // fp_to_uint and int_to_fp.
1240 setOperationAction(ISD::FADD, MVT::f128, LibCall);
1241 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1242
1243 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1244 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1245 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1246 setOperationAction(ISD::FABS, MVT::f128, Expand);
1247 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1248 setOperationAction(ISD::FMA, MVT::f128, Expand);
1249 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1250
1251 // Expand the fp_extend if the target type is fp128.
1252 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1253 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1254
1255 // Expand the fp_round if the source type is fp128.
1256 for (MVT VT : {MVT::f32, MVT::f64}) {
1257 setOperationAction(ISD::FP_ROUND, VT, Custom);
1258 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1259 }
1260
1261 setOperationAction(ISD::SETCC, MVT::f128, Custom);
1262 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
1263 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1264 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1265
1266 // Lower following f128 select_cc pattern:
1267 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1268 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1269
1270 // We need to handle f128 SELECT_CC with integer result type.
1271 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1272 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1273 }
1274
1275 if (Subtarget.hasP9Altivec()) {
1276 if (Subtarget.isISA3_1()) {
1277 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
1278 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
1279 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
1280 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
1281 } else {
1282 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1283 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1284 }
1285 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1286 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1287 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1288 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1289 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1290 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1291 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1292 }
1293
1294 if (Subtarget.hasP10Vector()) {
1295 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1296 }
1297 }
1298
1299 if (Subtarget.pairedVectorMemops()) {
1300 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1301 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1302 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1303 }
1304 if (Subtarget.hasMMA()) {
1305 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1306 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1307 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1308 setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1309 }
1310
1311 if (Subtarget.has64BitSupport())
1312 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1313
1314 if (Subtarget.isISA3_1())
1315 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1316
1317 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1318
1319 if (!isPPC64) {
1320 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1321 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1322 }
1323
1324 if (shouldInlineQuadwordAtomics()) {
1325 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1326 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1327 setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
1328 }
1329
1330 setBooleanContents(ZeroOrOneBooleanContent);
1331
1332 if (Subtarget.hasAltivec()) {
1333 // Altivec instructions set fields to all zeros or all ones.
1334 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1335 }
1336
1337 setLibcallName(RTLIB::MULO_I128, nullptr);
1338 if (!isPPC64) {
1339 // These libcalls are not available in 32-bit.
1340 setLibcallName(RTLIB::SHL_I128, nullptr);
1341 setLibcallName(RTLIB::SRL_I128, nullptr);
1342 setLibcallName(RTLIB::SRA_I128, nullptr);
1343 setLibcallName(RTLIB::MUL_I128, nullptr);
1344 setLibcallName(RTLIB::MULO_I64, nullptr);
1345 }
1346
1347 if (!isPPC64)
1348 setMaxAtomicSizeInBitsSupported(32);
1349 else if (shouldInlineQuadwordAtomics())
1350 setMaxAtomicSizeInBitsSupported(128);
1351 else
1352 setMaxAtomicSizeInBitsSupported(64);
1353
1354 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1355
1356 // We have target-specific dag combine patterns for the following nodes:
1357 setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL,
1358 ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1359 if (Subtarget.hasFPCVT())
1360 setTargetDAGCombine(ISD::UINT_TO_FP);
1361 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1362 if (Subtarget.useCRBits())
1363 setTargetDAGCombine(ISD::BRCOND);
1364 setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1365 ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1366
1367 setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1368
1369 setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1370
1371 if (Subtarget.useCRBits()) {
1372 setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1373 }
1374
1375 if (Subtarget.hasP9Altivec()) {
1376 setTargetDAGCombine({ISD::ABS, ISD::VSELECT});
1377 }
1378
1379 setLibcallName(RTLIB::LOG_F128, "logf128");
1380 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1381 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1382 setLibcallName(RTLIB::EXP_F128, "expf128");
1383 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1384 setLibcallName(RTLIB::SIN_F128, "sinf128");
1385 setLibcallName(RTLIB::COS_F128, "cosf128");
1386 setLibcallName(RTLIB::POW_F128, "powf128");
1387 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1388 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1389 setLibcallName(RTLIB::REM_F128, "fmodf128");
1390 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1391 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1392 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1393 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1394 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1395 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1396 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1397 setLibcallName(RTLIB::RINT_F128, "rintf128");
1398 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1399 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1400 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1401 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1402
1403 // With 32 condition bits, we don't need to sink (and duplicate) compares
1404 // aggressively in CodeGenPrep.
1405 if (Subtarget.useCRBits()) {
1406 setHasMultipleConditionRegisters();
1407 setJumpIsExpensive();
1408 }
1409
1410 setMinFunctionAlignment(Align(4));
1411
1412 switch (Subtarget.getCPUDirective()) {
1413 default: break;
1414 case PPC::DIR_970:
1415 case PPC::DIR_A2:
1416 case PPC::DIR_E500:
1417 case PPC::DIR_E500mc:
1418 case PPC::DIR_E5500:
1419 case PPC::DIR_PWR4:
1420 case PPC::DIR_PWR5:
1421 case PPC::DIR_PWR5X:
1422 case PPC::DIR_PWR6:
1423 case PPC::DIR_PWR6X:
1424 case PPC::DIR_PWR7:
1425 case PPC::DIR_PWR8:
1426 case PPC::DIR_PWR9:
1427 case PPC::DIR_PWR10:
1428 case PPC::DIR_PWR_FUTURE:
1429 setPrefLoopAlignment(Align(16));
1430 setPrefFunctionAlignment(Align(16));
1431 break;
1432 }
1433
1434 if (Subtarget.enableMachineScheduler())
1435 setSchedulingPreference(Sched::Source);
1436 else
1437 setSchedulingPreference(Sched::Hybrid);
1438
1439 computeRegisterProperties(STI.getRegisterInfo());
1440
1441 // The Freescale cores do better with aggressive inlining of memcpy and
1442 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1443 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1444 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1445 MaxStoresPerMemset = 32;
1446 MaxStoresPerMemsetOptSize = 16;
1447 MaxStoresPerMemcpy = 32;
1448 MaxStoresPerMemcpyOptSize = 8;
1449 MaxStoresPerMemmove = 32;
1450 MaxStoresPerMemmoveOptSize = 8;
1451 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1452 // The A2 also benefits from (very) aggressive inlining of memcpy and
1453 // friends. The overhead of a the function call, even when warm, can be
1454 // over one hundred cycles.
1455 MaxStoresPerMemset = 128;
1456 MaxStoresPerMemcpy = 128;
1457 MaxStoresPerMemmove = 128;
1458 MaxLoadsPerMemcmp = 128;
1459 } else {
1460 MaxLoadsPerMemcmp = 8;
1461 MaxLoadsPerMemcmpOptSize = 4;
1462 }
1463
1464 IsStrictFPEnabled = true;
1465
1466 // Let the subtarget (CPU) decide if a predictable select is more expensive
1467 // than the corresponding branch. This information is used in CGP to decide
1468 // when to convert selects into branches.
1469 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1470}
1471
1472// *********************************** NOTE ************************************
1473// For selecting load and store instructions, the addressing modes are defined
1474// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1475// patterns to match the load the store instructions.
1476//
1477// The TD definitions for the addressing modes correspond to their respective
1478// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1479// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1480// address mode flags of a particular node. Afterwards, the computed address
1481// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1482// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1483// accordingly, based on the preferred addressing mode.
1484//
1485// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1486// MemOpFlags contains all the possible flags that can be used to compute the
1487// optimal addressing mode for load and store instructions.
1488// AddrMode contains all the possible load and store addressing modes available
1489// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1490//
1491// When adding new load and store instructions, it is possible that new address
1492// flags may need to be added into MemOpFlags, and a new addressing mode will
1493// need to be added to AddrMode. An entry of the new addressing mode (consisting
1494// of the minimal and main distinguishing address flags for the new load/store
1495// instructions) will need to be added into initializeAddrModeMap() below.
1496// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1497// need to be updated to account for selecting the optimal addressing mode.
1498// *****************************************************************************
1499/// Initialize the map that relates the different addressing modes of the load
1500/// and store instructions to a set of flags. This ensures the load/store
1501/// instruction is correctly matched during instruction selection.
1502void PPCTargetLowering::initializeAddrModeMap() {
1503 AddrModesMap[PPC::AM_DForm] = {
1504 // LWZ, STW
1505 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1506 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1507 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1508 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1509 // LBZ, LHZ, STB, STH
1510 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1511 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1512 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1513 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1514 // LHA
1515 PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1516 PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1517 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1518 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1519 // LFS, LFD, STFS, STFD
1520 PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1521 PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1522 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1523 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1524 };
1525 AddrModesMap[PPC::AM_DSForm] = {
1526 // LWA
1527 PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1528 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1529 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1530 // LD, STD
1531 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1532 PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1533 PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1534 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1535 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1536 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1537 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1538 };
1539 AddrModesMap[PPC::AM_DQForm] = {
1540 // LXV, STXV
1541 PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1542 PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1543 PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1544 };
1545 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1546 PPC::MOF_SubtargetP10};
1547 // TODO: Add mapping for quadword load/store.
1548}
1549
1550/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1551/// the desired ByVal argument alignment.
1552static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1553 if (MaxAlign == MaxMaxAlign)
1554 return;
1555 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1556 if (MaxMaxAlign >= 32 &&
1557 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1558 MaxAlign = Align(32);
1559 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1560 MaxAlign < 16)
1561 MaxAlign = Align(16);
1562 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1563 Align EltAlign;
1564 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1565 if (EltAlign > MaxAlign)
1566 MaxAlign = EltAlign;
1567 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1568 for (auto *EltTy : STy->elements()) {
1569 Align EltAlign;
1570 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1571 if (EltAlign > MaxAlign)
1572 MaxAlign = EltAlign;
1573 if (MaxAlign == MaxMaxAlign)
1574 break;
1575 }
1576 }
1577}
1578
1579/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1580/// function arguments in the caller parameter area.
1581uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1582 const DataLayout &DL) const {
1583 // 16byte and wider vectors are passed on 16byte boundary.
1584 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1585 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1586 if (Subtarget.hasAltivec())
1587 getMaxByValAlign(Ty, Alignment, Align(16));
1588 return Alignment.value();
1589}
1590
1591bool PPCTargetLowering::useSoftFloat() const {
1592 return Subtarget.useSoftFloat();
1593}
1594
1595bool PPCTargetLowering::hasSPE() const {
1596 return Subtarget.hasSPE();
1597}
1598
1599bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1600 return VT.isScalarInteger();
1601}
1602
1603const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1604 switch ((PPCISD::NodeType)Opcode) {
1605 case PPCISD::FIRST_NUMBER: break;
1606 case PPCISD::FSEL: return "PPCISD::FSEL";
1607 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1608 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1609 case PPCISD::FCFID: return "PPCISD::FCFID";
1610 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1611 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1612 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1613 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1614 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1615 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1616 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1617 case PPCISD::FP_TO_UINT_IN_VSR:
1618 return "PPCISD::FP_TO_UINT_IN_VSR,";
1619 case PPCISD::FP_TO_SINT_IN_VSR:
1620 return "PPCISD::FP_TO_SINT_IN_VSR";
1621 case PPCISD::FRE: return "PPCISD::FRE";
1622 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1623 case PPCISD::FTSQRT:
1624 return "PPCISD::FTSQRT";
1625 case PPCISD::FSQRT:
1626 return "PPCISD::FSQRT";
1627 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1628 case PPCISD::VPERM: return "PPCISD::VPERM";
1629 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1630 case PPCISD::XXSPLTI_SP_TO_DP:
1631 return "PPCISD::XXSPLTI_SP_TO_DP";
1632 case PPCISD::XXSPLTI32DX:
1633 return "PPCISD::XXSPLTI32DX";
1634 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1635 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1636 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1637 case PPCISD::CMPB: return "PPCISD::CMPB";
1638 case PPCISD::Hi: return "PPCISD::Hi";
1639 case PPCISD::Lo: return "PPCISD::Lo";
1640 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1641 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1642 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1643 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1644 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1645 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1646 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1647 case PPCISD::SRL: return "PPCISD::SRL";
1648 case PPCISD::SRA: return "PPCISD::SRA";
1649 case PPCISD::SHL: return "PPCISD::SHL";
1650 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1651 case PPCISD::CALL: return "PPCISD::CALL";
1652 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1653 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1654 case PPCISD::CALL_RM:
1655 return "PPCISD::CALL_RM";
1656 case PPCISD::CALL_NOP_RM:
1657 return "PPCISD::CALL_NOP_RM";
1658 case PPCISD::CALL_NOTOC_RM:
1659 return "PPCISD::CALL_NOTOC_RM";
1660 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1661 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1662 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1663 case PPCISD::BCTRL_RM:
1664 return "PPCISD::BCTRL_RM";
1665 case PPCISD::BCTRL_LOAD_TOC_RM:
1666 return "PPCISD::BCTRL_LOAD_TOC_RM";
1667 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1668 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1669 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1670 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1671 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1672 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1673 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1674 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1675 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1676 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1677 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1678 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1679 case PPCISD::ANDI_rec_1_EQ_BIT:
1680 return "PPCISD::ANDI_rec_1_EQ_BIT";
1681 case PPCISD::ANDI_rec_1_GT_BIT:
1682 return "PPCISD::ANDI_rec_1_GT_BIT";
1683 case PPCISD::VCMP: return "PPCISD::VCMP";
1684 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1685 case PPCISD::LBRX: return "PPCISD::LBRX";
1686 case PPCISD::STBRX: return "PPCISD::STBRX";
1687 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1688 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1689 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1690 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1691 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1692 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1693 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1694 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1695 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1696 case PPCISD::ST_VSR_SCAL_INT:
1697 return "PPCISD::ST_VSR_SCAL_INT";
1698 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1699 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1700 case PPCISD::BDZ: return "PPCISD::BDZ";
1701 case PPCISD::MFFS: return "PPCISD::MFFS";
1702 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1703 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1704 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1705 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1706 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1707 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1708 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1709 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1710 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1711 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1712 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1713 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1714 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1715 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1716 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1717 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1718 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1719 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1720 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1721 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1722 case PPCISD::PADDI_DTPREL:
1723 return "PPCISD::PADDI_DTPREL";
1724 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1725 case PPCISD::SC: return "PPCISD::SC";
1726 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1727 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1728 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1729 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1730 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1731 case PPCISD::VABSD: return "PPCISD::VABSD";
1732 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1733 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1734 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1735 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1736 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1737 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1738 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1739 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1740 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1741 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1742 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1743 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1744 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1745 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1746 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1747 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1748 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1749 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1750 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1751 case PPCISD::STRICT_FADDRTZ:
1752 return "PPCISD::STRICT_FADDRTZ";
1753 case PPCISD::STRICT_FCTIDZ:
1754 return "PPCISD::STRICT_FCTIDZ";
1755 case PPCISD::STRICT_FCTIWZ:
1756 return "PPCISD::STRICT_FCTIWZ";
1757 case PPCISD::STRICT_FCTIDUZ:
1758 return "PPCISD::STRICT_FCTIDUZ";
1759 case PPCISD::STRICT_FCTIWUZ:
1760 return "PPCISD::STRICT_FCTIWUZ";
1761 case PPCISD::STRICT_FCFID:
1762 return "PPCISD::STRICT_FCFID";
1763 case PPCISD::STRICT_FCFIDU:
1764 return "PPCISD::STRICT_FCFIDU";
1765 case PPCISD::STRICT_FCFIDS:
1766 return "PPCISD::STRICT_FCFIDS";
1767 case PPCISD::STRICT_FCFIDUS:
1768 return "PPCISD::STRICT_FCFIDUS";
1769 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1770 }
1771 return nullptr;
1772}
1773
1774EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1775 EVT VT) const {
1776 if (!VT.isVector())
1777 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1778
1779 return VT.changeVectorElementTypeToInteger();
1780}
1781
1782bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1783 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")(static_cast <bool> (VT.isFloatingPoint() && "Non-floating-point FMA?"
) ? void (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 1783, __extension__
__PRETTY_FUNCTION__))
;
1784 return true;
1785}
1786
1787//===----------------------------------------------------------------------===//
1788// Node matching predicates, for use by the tblgen matching code.
1789//===----------------------------------------------------------------------===//
1790
1791/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1792static bool isFloatingPointZero(SDValue Op) {
1793 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1794 return CFP->getValueAPF().isZero();
1795 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1796 // Maybe this has already been legalized into the constant pool?
1797 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1798 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1799 return CFP->getValueAPF().isZero();
1800 }
1801 return false;
1802}
1803
1804/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1805/// true if Op is undef or if it matches the specified value.
1806static bool isConstantOrUndef(int Op, int Val) {
1807 return Op < 0 || Op == Val;
1808}
1809
1810/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1811/// VPKUHUM instruction.
1812/// The ShuffleKind distinguishes between big-endian operations with
1813/// two different inputs (0), either-endian operations with two identical
1814/// inputs (1), and little-endian operations with two different inputs (2).
1815/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1816bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1817 SelectionDAG &DAG) {
1818 bool IsLE = DAG.getDataLayout().isLittleEndian();
1819 if (ShuffleKind == 0) {
1820 if (IsLE)
1821 return false;
1822 for (unsigned i = 0; i != 16; ++i)
1823 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1824 return false;
1825 } else if (ShuffleKind == 2) {
1826 if (!IsLE)
1827 return false;
1828 for (unsigned i = 0; i != 16; ++i)
1829 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1830 return false;
1831 } else if (ShuffleKind == 1) {
1832 unsigned j = IsLE ? 0 : 1;
1833 for (unsigned i = 0; i != 8; ++i)
1834 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1835 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1836 return false;
1837 }
1838 return true;
1839}
1840
1841/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1842/// VPKUWUM instruction.
1843/// The ShuffleKind distinguishes between big-endian operations with
1844/// two different inputs (0), either-endian operations with two identical
1845/// inputs (1), and little-endian operations with two different inputs (2).
1846/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1847bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1848 SelectionDAG &DAG) {
1849 bool IsLE = DAG.getDataLayout().isLittleEndian();
1850 if (ShuffleKind == 0) {
1851 if (IsLE)
1852 return false;
1853 for (unsigned i = 0; i != 16; i += 2)
1854 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1855 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1856 return false;
1857 } else if (ShuffleKind == 2) {
1858 if (!IsLE)
1859 return false;
1860 for (unsigned i = 0; i != 16; i += 2)
1861 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1862 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1863 return false;
1864 } else if (ShuffleKind == 1) {
1865 unsigned j = IsLE ? 0 : 2;
1866 for (unsigned i = 0; i != 8; i += 2)
1867 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1868 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1869 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1870 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1871 return false;
1872 }
1873 return true;
1874}
1875
1876/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1877/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1878/// current subtarget.
1879///
1880/// The ShuffleKind distinguishes between big-endian operations with
1881/// two different inputs (0), either-endian operations with two identical
1882/// inputs (1), and little-endian operations with two different inputs (2).
1883/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1884bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1885 SelectionDAG &DAG) {
1886 const PPCSubtarget& Subtarget =
1887 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1888 if (!Subtarget.hasP8Vector())
1889 return false;
1890
1891 bool IsLE = DAG.getDataLayout().isLittleEndian();
1892 if (ShuffleKind == 0) {
1893 if (IsLE)
1894 return false;
1895 for (unsigned i = 0; i != 16; i += 4)
1896 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1897 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1898 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1899 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1900 return false;
1901 } else if (ShuffleKind == 2) {
1902 if (!IsLE)
1903 return false;
1904 for (unsigned i = 0; i != 16; i += 4)
1905 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1906 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1907 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1908 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1909 return false;
1910 } else if (ShuffleKind == 1) {
1911 unsigned j = IsLE ? 0 : 4;
1912 for (unsigned i = 0; i != 8; i += 4)
1913 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1914 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1915 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1916 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1917 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1918 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1919 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1920 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1921 return false;
1922 }
1923 return true;
1924}
1925
1926/// isVMerge - Common function, used to match vmrg* shuffles.
1927///
1928static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1929 unsigned LHSStart, unsigned RHSStart) {
1930 if (N->getValueType(0) != MVT::v16i8)
1931 return false;
1932 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(static_cast <bool> ((UnitSize == 1 || UnitSize == 2 ||
UnitSize == 4) && "Unsupported merge size!") ? void (
0) : __assert_fail ("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 1933, __extension__
__PRETTY_FUNCTION__))
1933 "Unsupported merge size!")(static_cast <bool> ((UnitSize == 1 || UnitSize == 2 ||
UnitSize == 4) && "Unsupported merge size!") ? void (
0) : __assert_fail ("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 1933, __extension__
__PRETTY_FUNCTION__))
;
1934
1935 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1936 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1937 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1938 LHSStart+j+i*UnitSize) ||
1939 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1940 RHSStart+j+i*UnitSize))
1941 return false;
1942 }
1943 return true;
1944}
1945
1946/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1947/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1948/// The ShuffleKind distinguishes between big-endian merges with two
1949/// different inputs (0), either-endian merges with two identical inputs (1),
1950/// and little-endian merges with two different inputs (2). For the latter,
1951/// the input operands are swapped (see PPCInstrAltivec.td).
1952bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1953 unsigned ShuffleKind, SelectionDAG &DAG) {
1954 if (DAG.getDataLayout().isLittleEndian()) {
1955 if (ShuffleKind == 1) // unary
1956 return isVMerge(N, UnitSize, 0, 0);
1957 else if (ShuffleKind == 2) // swapped
1958 return isVMerge(N, UnitSize, 0, 16);
1959 else
1960 return false;
1961 } else {
1962 if (ShuffleKind == 1) // unary
1963 return isVMerge(N, UnitSize, 8, 8);
1964 else if (ShuffleKind == 0) // normal
1965 return isVMerge(N, UnitSize, 8, 24);
1966 else
1967 return false;
1968 }
1969}
1970
1971/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1972/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1973/// The ShuffleKind distinguishes between big-endian merges with two
1974/// different inputs (0), either-endian merges with two identical inputs (1),
1975/// and little-endian merges with two different inputs (2). For the latter,
1976/// the input operands are swapped (see PPCInstrAltivec.td).
1977bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1978 unsigned ShuffleKind, SelectionDAG &DAG) {
1979 if (DAG.getDataLayout().isLittleEndian()) {
1980 if (ShuffleKind == 1) // unary
1981 return isVMerge(N, UnitSize, 8, 8);
1982 else if (ShuffleKind == 2) // swapped
1983 return isVMerge(N, UnitSize, 8, 24);
1984 else
1985 return false;
1986 } else {
1987 if (ShuffleKind == 1) // unary
1988 return isVMerge(N, UnitSize, 0, 0);
1989 else if (ShuffleKind == 0) // normal
1990 return isVMerge(N, UnitSize, 0, 16);
1991 else
1992 return false;
1993 }
1994}
1995
1996/**
1997 * Common function used to match vmrgew and vmrgow shuffles
1998 *
1999 * The indexOffset determines whether to look for even or odd words in
2000 * the shuffle mask. This is based on the of the endianness of the target
2001 * machine.
2002 * - Little Endian:
2003 * - Use offset of 0 to check for odd elements
2004 * - Use offset of 4 to check for even elements
2005 * - Big Endian:
2006 * - Use offset of 0 to check for even elements
2007 * - Use offset of 4 to check for odd elements
2008 * A detailed description of the vector element ordering for little endian and
2009 * big endian can be found at
2010 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2011 * Targeting your applications - what little endian and big endian IBM XL C/C++
2012 * compiler differences mean to you
2013 *
2014 * The mask to the shuffle vector instruction specifies the indices of the
2015 * elements from the two input vectors to place in the result. The elements are
2016 * numbered in array-access order, starting with the first vector. These vectors
2017 * are always of type v16i8, thus each vector will contain 16 elements of size
2018 * 8. More info on the shuffle vector can be found in the
2019 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2020 * Language Reference.
2021 *
2022 * The RHSStartValue indicates whether the same input vectors are used (unary)
2023 * or two different input vectors are used, based on the following:
2024 * - If the instruction uses the same vector for both inputs, the range of the
2025 * indices will be 0 to 15. In this case, the RHSStart value passed should
2026 * be 0.
2027 * - If the instruction has two different vectors then the range of the
2028 * indices will be 0 to 31. In this case, the RHSStart value passed should
2029 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2030 * to 31 specify elements in the second vector).
2031 *
2032 * \param[in] N The shuffle vector SD Node to analyze
2033 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2034 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2035 * vector to the shuffle_vector instruction
2036 * \return true iff this shuffle vector represents an even or odd word merge
2037 */
2038static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2039 unsigned RHSStartValue) {
2040 if (N->getValueType(0) != MVT::v16i8)
2041 return false;
2042
2043 for (unsigned i = 0; i < 2; ++i)
2044 for (unsigned j = 0; j < 4; ++j)
2045 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2046 i*RHSStartValue+j+IndexOffset) ||
2047 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2048 i*RHSStartValue+j+IndexOffset+8))
2049 return false;
2050 return true;
2051}
2052
2053/**
2054 * Determine if the specified shuffle mask is suitable for the vmrgew or
2055 * vmrgow instructions.
2056 *
2057 * \param[in] N The shuffle vector SD Node to analyze
2058 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2059 * \param[in] ShuffleKind Identify the type of merge:
2060 * - 0 = big-endian merge with two different inputs;
2061 * - 1 = either-endian merge with two identical inputs;
2062 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2063 * little-endian merges).
2064 * \param[in] DAG The current SelectionDAG
2065 * \return true iff this shuffle mask
2066 */
2067bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
2068 unsigned ShuffleKind, SelectionDAG &DAG) {
2069 if (DAG.getDataLayout().isLittleEndian()) {
2070 unsigned indexOffset = CheckEven ? 4 : 0;
2071 if (ShuffleKind == 1) // Unary
2072 return isVMerge(N, indexOffset, 0);
2073 else if (ShuffleKind == 2) // swapped
2074 return isVMerge(N, indexOffset, 16);
2075 else
2076 return false;
2077 }
2078 else {
2079 unsigned indexOffset = CheckEven ? 0 : 4;
2080 if (ShuffleKind == 1) // Unary
2081 return isVMerge(N, indexOffset, 0);
2082 else if (ShuffleKind == 0) // Normal
2083 return isVMerge(N, indexOffset, 16);
2084 else
2085 return false;
2086 }
2087 return false;
2088}
2089
2090/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2091/// amount, otherwise return -1.
2092/// The ShuffleKind distinguishes between big-endian operations with two
2093/// different inputs (0), either-endian operations with two identical inputs
2094/// (1), and little-endian operations with two different inputs (2). For the
2095/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2096int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2097 SelectionDAG &DAG) {
2098 if (N->getValueType(0) != MVT::v16i8)
2099 return -1;
2100
2101 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2102
2103 // Find the first non-undef value in the shuffle mask.
2104 unsigned i;
2105 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2106 /*search*/;
2107
2108 if (i == 16) return -1; // all undef.
2109
2110 // Otherwise, check to see if the rest of the elements are consecutively
2111 // numbered from this value.
2112 unsigned ShiftAmt = SVOp->getMaskElt(i);
2113 if (ShiftAmt < i) return -1;
2114
2115 ShiftAmt -= i;
2116 bool isLE = DAG.getDataLayout().isLittleEndian();
2117
2118 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2119 // Check the rest of the elements to see if they are consecutive.
2120 for (++i; i != 16; ++i)
2121 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2122 return -1;
2123 } else if (ShuffleKind == 1) {
2124 // Check the rest of the elements to see if they are consecutive.
2125 for (++i; i != 16; ++i)
2126 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2127 return -1;
2128 } else
2129 return -1;
2130
2131 if (isLE)
2132 ShiftAmt = 16 - ShiftAmt;
2133
2134 return ShiftAmt;
2135}
2136
2137/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2138/// specifies a splat of a single element that is suitable for input to
2139/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2140bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2141 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& isPowerOf2_32(EltSize) && EltSize <= 8
&& "Can only handle 1,2,4,8 byte element sizes") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2142, __extension__
__PRETTY_FUNCTION__))
2142 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& isPowerOf2_32(EltSize) && EltSize <= 8
&& "Can only handle 1,2,4,8 byte element sizes") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2142, __extension__
__PRETTY_FUNCTION__))
;
2143
2144 // The consecutive indices need to specify an element, not part of two
2145 // different elements. So abandon ship early if this isn't the case.
2146 if (N->getMaskElt(0) % EltSize != 0)
2147 return false;
2148
2149 // This is a splat operation if each element of the permute is the same, and
2150 // if the value doesn't reference the second vector.
2151 unsigned ElementBase = N->getMaskElt(0);
2152
2153 // FIXME: Handle UNDEF elements too!
2154 if (ElementBase >= 16)
2155 return false;
2156
2157 // Check that the indices are consecutive, in the case of a multi-byte element
2158 // splatted with a v16i8 mask.
2159 for (unsigned i = 1; i != EltSize; ++i)
2160 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2161 return false;
2162
2163 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2164 if (N->getMaskElt(i) < 0) continue;
2165 for (unsigned j = 0; j != EltSize; ++j)
2166 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2167 return false;
2168 }
2169 return true;
2170}
2171
2172/// Check that the mask is shuffling N byte elements. Within each N byte
2173/// element of the mask, the indices could be either in increasing or
2174/// decreasing order as long as they are consecutive.
2175/// \param[in] N the shuffle vector SD Node to analyze
2176/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2177/// Word/DoubleWord/QuadWord).
2178/// \param[in] StepLen the delta indices number among the N byte element, if
2179/// the mask is in increasing/decreasing order then it is 1/-1.
2180/// \return true iff the mask is shuffling N byte elements.
2181static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2182 int StepLen) {
2183 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(static_cast <bool> ((Width == 2 || Width == 4 || Width
== 8 || Width == 16) && "Unexpected element width.")
? void (0) : __assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2184, __extension__
__PRETTY_FUNCTION__))
2184 "Unexpected element width.")(static_cast <bool> ((Width == 2 || Width == 4 || Width
== 8 || Width == 16) && "Unexpected element width.")
? void (0) : __assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2184, __extension__
__PRETTY_FUNCTION__))
;
2185 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(static_cast <bool> ((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.") ? void (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2185, __extension__
__PRETTY_FUNCTION__))
;
2186
2187 unsigned NumOfElem = 16 / Width;
2188 unsigned MaskVal[16]; // Width is never greater than 16
2189 for (unsigned i = 0; i < NumOfElem; ++i) {
2190 MaskVal[0] = N->getMaskElt(i * Width);
2191 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2192 return false;
2193 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2194 return false;
2195 }
2196
2197 for (unsigned int j = 1; j < Width; ++j) {
2198 MaskVal[j] = N->getMaskElt(i * Width + j);
2199 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2200 return false;
2201 }
2202 }
2203 }
2204
2205 return true;
2206}
2207
2208bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2209 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2210 if (!isNByteElemShuffleMask(N, 4, 1))
2211 return false;
2212
2213 // Now we look at mask elements 0,4,8,12
2214 unsigned M0 = N->getMaskElt(0) / 4;
2215 unsigned M1 = N->getMaskElt(4) / 4;
2216 unsigned M2 = N->getMaskElt(8) / 4;
2217 unsigned M3 = N->getMaskElt(12) / 4;
2218 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2219 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2220
2221 // Below, let H and L be arbitrary elements of the shuffle mask
2222 // where H is in the range [4,7] and L is in the range [0,3].
2223 // H, 1, 2, 3 or L, 5, 6, 7
2224 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2225 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2226 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2227 InsertAtByte = IsLE ? 12 : 0;
2228 Swap = M0 < 4;
2229 return true;
2230 }
2231 // 0, H, 2, 3 or 4, L, 6, 7
2232 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2233 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2234 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2235 InsertAtByte = IsLE ? 8 : 4;
2236 Swap = M1 < 4;
2237 return true;
2238 }
2239 // 0, 1, H, 3 or 4, 5, L, 7
2240 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2241 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2242 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2243 InsertAtByte = IsLE ? 4 : 8;
2244 Swap = M2 < 4;
2245 return true;
2246 }
2247 // 0, 1, 2, H or 4, 5, 6, L
2248 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2249 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2250 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2251 InsertAtByte = IsLE ? 0 : 12;
2252 Swap = M3 < 4;
2253 return true;
2254 }
2255
2256 // If both vector operands for the shuffle are the same vector, the mask will
2257 // contain only elements from the first one and the second one will be undef.
2258 if (N->getOperand(1).isUndef()) {
2259 ShiftElts = 0;
2260 Swap = true;
2261 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2262 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2263 InsertAtByte = IsLE ? 12 : 0;
2264 return true;
2265 }
2266 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2267 InsertAtByte = IsLE ? 8 : 4;
2268 return true;
2269 }
2270 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2271 InsertAtByte = IsLE ? 4 : 8;
2272 return true;
2273 }
2274 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2275 InsertAtByte = IsLE ? 0 : 12;
2276 return true;
2277 }
2278 }
2279
2280 return false;
2281}
2282
2283bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2284 bool &Swap, bool IsLE) {
2285 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2285, __extension__
__PRETTY_FUNCTION__))
;
2286 // Ensure each byte index of the word is consecutive.
2287 if (!isNByteElemShuffleMask(N, 4, 1))
2288 return false;
2289
2290 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2291 unsigned M0 = N->getMaskElt(0) / 4;
2292 unsigned M1 = N->getMaskElt(4) / 4;
2293 unsigned M2 = N->getMaskElt(8) / 4;
2294 unsigned M3 = N->getMaskElt(12) / 4;
2295
2296 // If both vector operands for the shuffle are the same vector, the mask will
2297 // contain only elements from the first one and the second one will be undef.
2298 if (N->getOperand(1).isUndef()) {
2299 assert(M0 < 4 && "Indexing into an undef vector?")(static_cast <bool> (M0 < 4 && "Indexing into an undef vector?"
) ? void (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2299, __extension__
__PRETTY_FUNCTION__))
;
2300 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2301 return false;
2302
2303 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2304 Swap = false;
2305 return true;
2306 }
2307
2308 // Ensure each word index of the ShuffleVector Mask is consecutive.
2309 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2310 return false;
2311
2312 if (IsLE) {
2313 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2314 // Input vectors don't need to be swapped if the leading element
2315 // of the result is one of the 3 left elements of the second vector
2316 // (or if there is no shift to be done at all).
2317 Swap = false;
2318 ShiftElts = (8 - M0) % 8;
2319 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2320 // Input vectors need to be swapped if the leading element
2321 // of the result is one of the 3 left elements of the first vector
2322 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2323 Swap = true;
2324 ShiftElts = (4 - M0) % 4;
2325 }
2326
2327 return true;
2328 } else { // BE
2329 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2330 // Input vectors don't need to be swapped if the leading element
2331 // of the result is one of the 4 elements of the first vector.
2332 Swap = false;
2333 ShiftElts = M0;
2334 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2335 // Input vectors need to be swapped if the leading element
2336 // of the result is one of the 4 elements of the right vector.
2337 Swap = true;
2338 ShiftElts = M0 - 4;
2339 }
2340
2341 return true;
2342 }
2343}
2344
2345bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2346 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2346, __extension__
__PRETTY_FUNCTION__))
;
2347
2348 if (!isNByteElemShuffleMask(N, Width, -1))
2349 return false;
2350
2351 for (int i = 0; i < 16; i += Width)
2352 if (N->getMaskElt(i) != i + Width - 1)
2353 return false;
2354
2355 return true;
2356}
2357
2358bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2359 return isXXBRShuffleMaskHelper(N, 2);
2360}
2361
2362bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2363 return isXXBRShuffleMaskHelper(N, 4);
2364}
2365
2366bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2367 return isXXBRShuffleMaskHelper(N, 8);
2368}
2369
2370bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2371 return isXXBRShuffleMaskHelper(N, 16);
2372}
2373
2374/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2375/// if the inputs to the instruction should be swapped and set \p DM to the
2376/// value for the immediate.
2377/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2378/// AND element 0 of the result comes from the first input (LE) or second input
2379/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2380/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2381/// mask.
2382bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2383 bool &Swap, bool IsLE) {
2384 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")(static_cast <bool> (N->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2384, __extension__
__PRETTY_FUNCTION__))
;
2385
2386 // Ensure each byte index of the double word is consecutive.
2387 if (!isNByteElemShuffleMask(N, 8, 1))
2388 return false;
2389
2390 unsigned M0 = N->getMaskElt(0) / 8;
2391 unsigned M1 = N->getMaskElt(8) / 8;
2392 assert(((M0 | M1) < 4) && "A mask element out of bounds?")(static_cast <bool> (((M0 | M1) < 4) && "A mask element out of bounds?"
) ? void (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2392, __extension__
__PRETTY_FUNCTION__))
;
2393
2394 // If both vector operands for the shuffle are the same vector, the mask will
2395 // contain only elements from the first one and the second one will be undef.
2396 if (N->getOperand(1).isUndef()) {
2397 if ((M0 | M1) < 2) {
2398 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2399 Swap = false;
2400 return true;
2401 } else
2402 return false;
2403 }
2404
2405 if (IsLE) {
2406 if (M0 > 1 && M1 < 2) {
2407 Swap = false;
2408 } else if (M0 < 2 && M1 > 1) {
2409 M0 = (M0 + 2) % 4;
2410 M1 = (M1 + 2) % 4;
2411 Swap = true;
2412 } else
2413 return false;
2414
2415 // Note: if control flow comes here that means Swap is already set above
2416 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2417 return true;
2418 } else { // BE
2419 if (M0 < 2 && M1 > 1) {
2420 Swap = false;
2421 } else if (M0 > 1 && M1 < 2) {
2422 M0 = (M0 + 2) % 4;
2423 M1 = (M1 + 2) % 4;
2424 Swap = true;
2425 } else
2426 return false;
2427
2428 // Note: if control flow comes here that means Swap is already set above
2429 DM = (M0 << 1) + (M1 & 1);
2430 return true;
2431 }
2432}
2433
2434
2435/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2436/// appropriate for PPC mnemonics (which have a big endian bias - namely
2437/// elements are counted from the left of the vector register).
2438unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2439 SelectionDAG &DAG) {
2440 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2441 assert(isSplatShuffleMask(SVOp, EltSize))(static_cast <bool> (isSplatShuffleMask(SVOp, EltSize))
? void (0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
;
2442 if (DAG.getDataLayout().isLittleEndian())
2443 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2444 else
2445 return SVOp->getMaskElt(0) / EltSize;
2446}
2447
2448/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2449/// by using a vspltis[bhw] instruction of the specified element size, return
2450/// the constant being splatted. The ByteSize field indicates the number of
2451/// bytes of each element [124] -> [bhw].
2452SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2453 SDValue OpVal;
2454
2455 // If ByteSize of the splat is bigger than the element size of the
2456 // build_vector, then we have a case where we are checking for a splat where
2457 // multiple elements of the buildvector are folded together into a single
2458 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2459 unsigned EltSize = 16/N->getNumOperands();
2460 if (EltSize < ByteSize) {
2461 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2462 SDValue UniquedVals[4];
2463 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")(static_cast <bool> (Multiple > 1 && Multiple
<= 4 && "How can this happen?") ? void (0) : __assert_fail
("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2463, __extension__
__PRETTY_FUNCTION__))
;
2464
2465 // See if all of the elements in the buildvector agree across.
2466 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2467 if (N->getOperand(i).isUndef()) continue;
2468 // If the element isn't a constant, bail fully out.
2469 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2470
2471 if (!UniquedVals[i&(Multiple-1)].getNode())
2472 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2473 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2474 return SDValue(); // no match.
2475 }
2476
2477 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2478 // either constant or undef values that are identical for each chunk. See
2479 // if these chunks can form into a larger vspltis*.
2480
2481 // Check to see if all of the leading entries are either 0 or -1. If
2482 // neither, then this won't fit into the immediate field.
2483 bool LeadingZero = true;
2484 bool LeadingOnes = true;
2485 for (unsigned i = 0; i != Multiple-1; ++i) {
2486 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2487
2488 LeadingZero &= isNullConstant(UniquedVals[i]);
2489 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2490 }
2491 // Finally, check the least significant entry.
2492 if (LeadingZero) {
2493 if (!UniquedVals[Multiple-1].getNode())
2494 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2495 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2496 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2497 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2498 }
2499 if (LeadingOnes) {
2500 if (!UniquedVals[Multiple-1].getNode())
2501 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2502 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2503 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2504 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2505 }
2506
2507 return SDValue();
2508 }
2509
2510 // Check to see if this buildvec has a single non-undef value in its elements.
2511 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2512 if (N->getOperand(i).isUndef()) continue;
2513 if (!OpVal.getNode())
2514 OpVal = N->getOperand(i);
2515 else if (OpVal != N->getOperand(i))
2516 return SDValue();
2517 }
2518
2519 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2520
2521 unsigned ValSizeInBytes = EltSize;
2522 uint64_t Value = 0;
2523 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2524 Value = CN->getZExtValue();
2525 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2526 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")(static_cast <bool> (CN->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!") ? void (0) : __assert_fail
("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2526, __extension__
__PRETTY_FUNCTION__))
;
2527 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2528 }
2529
2530 // If the splat value is larger than the element value, then we can never do
2531 // this splat. The only case that we could fit the replicated bits into our
2532 // immediate field for would be zero, and we prefer to use vxor for it.
2533 if (ValSizeInBytes < ByteSize) return SDValue();
2534
2535 // If the element value is larger than the splat value, check if it consists
2536 // of a repeated bit pattern of size ByteSize.
2537 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2538 return SDValue();
2539
2540 // Properly sign extend the value.
2541 int MaskVal = SignExtend32(Value, ByteSize * 8);
2542
2543 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2544 if (MaskVal == 0) return SDValue();
2545
2546 // Finally, if this value fits in a 5 bit sext field, return it
2547 if (SignExtend32<5>(MaskVal) == MaskVal)
2548 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2549 return SDValue();
2550}
2551
2552//===----------------------------------------------------------------------===//
2553// Addressing Mode Selection
2554//===----------------------------------------------------------------------===//
2555
2556/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2557/// or 64-bit immediate, and if the value can be accurately represented as a
2558/// sign extension from a 16-bit value. If so, this returns true and the
2559/// immediate.
2560bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2561 if (!isa<ConstantSDNode>(N))
2562 return false;
2563
2564 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2565 if (N->getValueType(0) == MVT::i32)
2566 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2567 else
2568 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2569}
2570bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2571 return isIntS16Immediate(Op.getNode(), Imm);
2572}
2573
2574/// Used when computing address flags for selecting loads and stores.
2575/// If we have an OR, check if the LHS and RHS are provably disjoint.
2576/// An OR of two provably disjoint values is equivalent to an ADD.
2577/// Most PPC load/store instructions compute the effective address as a sum,
2578/// so doing this conversion is useful.
2579static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2580 if (N.getOpcode() != ISD::OR)
2581 return false;
2582 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2583 if (!LHSKnown.Zero.getBoolValue())
2584 return false;
2585 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2586 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2587}
2588
2589/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2590/// be represented as an indexed [r+r] operation.
2591bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2592 SDValue &Index,
2593 SelectionDAG &DAG) const {
2594 for (SDNode *U : N->uses()) {
2595 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2596 if (Memop->getMemoryVT() == MVT::f64) {
2597 Base = N.getOperand(0);
2598 Index = N.getOperand(1);
2599 return true;
2600 }
2601 }
2602 }
2603 return false;
2604}
2605
2606/// isIntS34Immediate - This method tests if value of node given can be
2607/// accurately represented as a sign extension from a 34-bit value. If so,
2608/// this returns true and the immediate.
2609bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2610 if (!isa<ConstantSDNode>(N))
2611 return false;
2612
2613 Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2614 return isInt<34>(Imm);
2615}
2616bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2617 return isIntS34Immediate(Op.getNode(), Imm);
2618}
2619
2620/// SelectAddressRegReg - Given the specified addressed, check to see if it
2621/// can be represented as an indexed [r+r] operation. Returns false if it
2622/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2623/// non-zero and N can be represented by a base register plus a signed 16-bit
2624/// displacement, make a more precise judgement by checking (displacement % \p
2625/// EncodingAlignment).
2626bool PPCTargetLowering::SelectAddressRegReg(
2627 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2628 MaybeAlign EncodingAlignment) const {
2629 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2630 // a [pc+imm].
2631 if (SelectAddressPCRel(N, Base))
2632 return false;
2633
2634 int16_t Imm = 0;
2635 if (N.getOpcode() == ISD::ADD) {
2636 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2637 // SPE load/store can only handle 8-bit offsets.
2638 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2639 return true;
2640 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2641 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2642 return false; // r+i
2643 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2644 return false; // r+i
2645
2646 Base = N.getOperand(0);
2647 Index = N.getOperand(1);
2648 return true;
2649 } else if (N.getOpcode() == ISD::OR) {
2650 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2651 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2652 return false; // r+i can fold it if we can.
2653
2654 // If this is an or of disjoint bitfields, we can codegen this as an add
2655 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2656 // disjoint.
2657 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2658
2659 if (LHSKnown.Zero.getBoolValue()) {
2660 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2661 // If all of the bits are known zero on the LHS or RHS, the add won't
2662 // carry.
2663 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2664 Base = N.getOperand(0);
2665 Index = N.getOperand(1);
2666 return true;
2667 }
2668 }
2669 }
2670
2671 return false;
2672}
2673
2674// If we happen to be doing an i64 load or store into a stack slot that has
2675// less than a 4-byte alignment, then the frame-index elimination may need to
2676// use an indexed load or store instruction (because the offset may not be a
2677// multiple of 4). The extra register needed to hold the offset comes from the
2678// register scavenger, and it is possible that the scavenger will need to use
2679// an emergency spill slot. As a result, we need to make sure that a spill slot
2680// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2681// stack slot.
2682static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2683 // FIXME: This does not handle the LWA case.
2684 if (VT != MVT::i64)
2685 return;
2686
2687 // NOTE: We'll exclude negative FIs here, which come from argument
2688 // lowering, because there are no known test cases triggering this problem
2689 // using packed structures (or similar). We can remove this exclusion if
2690 // we find such a test case. The reason why this is so test-case driven is
2691 // because this entire 'fixup' is only to prevent crashes (from the
2692 // register scavenger) on not-really-valid inputs. For example, if we have:
2693 // %a = alloca i1
2694 // %b = bitcast i1* %a to i64*
2695 // store i64* a, i64 b
2696 // then the store should really be marked as 'align 1', but is not. If it
2697 // were marked as 'align 1' then the indexed form would have been
2698 // instruction-selected initially, and the problem this 'fixup' is preventing
2699 // won't happen regardless.
2700 if (FrameIdx < 0)
2701 return;
2702
2703 MachineFunction &MF = DAG.getMachineFunction();
2704 MachineFrameInfo &MFI = MF.getFrameInfo();
2705
2706 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2707 return;
2708
2709 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2710 FuncInfo->setHasNonRISpills();
2711}
2712
2713/// Returns true if the address N can be represented by a base register plus
2714/// a signed 16-bit displacement [r+imm], and if it is not better
2715/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2716/// displacements that are multiples of that value.
2717bool PPCTargetLowering::SelectAddressRegImm(
2718 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2719 MaybeAlign EncodingAlignment) const {
2720 // FIXME dl should come from parent load or store, not from address
2721 SDLoc dl(N);
2722
2723 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2724 // a [pc+imm].
2725 if (SelectAddressPCRel(N, Base))
2726 return false;
2727
2728 // If this can be more profitably realized as r+r, fail.
2729 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2730 return false;
2731
2732 if (N.getOpcode() == ISD::ADD) {
2733 int16_t imm = 0;
2734 if (isIntS16Immediate(N.getOperand(1), imm) &&
2735 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2736 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2737 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2738 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2739 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2740 } else {
2741 Base = N.getOperand(0);
2742 }
2743 return true; // [r+i]
2744 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2745 // Match LOAD (ADD (X, Lo(G))).
2746 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()(static_cast <bool> (!cast<ConstantSDNode>(N.getOperand
(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"
) ? void (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2747, __extension__
__PRETTY_FUNCTION__))
2747 && "Cannot handle constant offsets yet!")(static_cast <bool> (!cast<ConstantSDNode>(N.getOperand
(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"
) ? void (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2747, __extension__
__PRETTY_FUNCTION__))
;
2748 Disp = N.getOperand(1).getOperand(0); // The global address.
2749 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2752, __extension__
__PRETTY_FUNCTION__))
2750 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2752, __extension__
__PRETTY_FUNCTION__))
2751 Disp.getOpcode() == ISD::TargetConstantPool ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2752, __extension__
__PRETTY_FUNCTION__))
2752 Disp.getOpcode() == ISD::TargetJumpTable)(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 2752, __extension__
__PRETTY_FUNCTION__))
;
2753 Base = N.getOperand(0);
2754 return true; // [&g+r]
2755 }
2756 } else if (N.getOpcode() == ISD::OR) {
2757 int16_t imm = 0;
2758 if (isIntS16Immediate(N.getOperand(1), imm) &&
2759 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2760 // If this is an or of disjoint bitfields, we can codegen this as an add
2761 // (for better address arithmetic) if the LHS and RHS of the OR are
2762 // provably disjoint.
2763 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2764
2765 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2766 // If all of the bits are known zero on the LHS or RHS, the add won't
2767 // carry.
2768 if (FrameIndexSDNode *FI =
2769 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2770 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2771 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2772 } else {
2773 Base = N.getOperand(0);
2774 }
2775 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2776 return true;
2777 }
2778 }
2779 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2780 // Loading from a constant address.
2781
2782 // If this address fits entirely in a 16-bit sext immediate field, codegen
2783 // this as "d, 0"
2784 int16_t Imm;
2785 if (isIntS16Immediate(CN, Imm) &&
2786 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2787 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2788 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2789 CN->getValueType(0));
2790 return true;
2791 }
2792
2793 // Handle 32-bit sext immediates with LIS + addr mode.
2794 if ((CN->getValueType(0) == MVT::i32 ||
2795 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2796 (!EncodingAlignment ||
2797 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2798 int Addr = (int)CN->getZExtValue();
2799
2800 // Otherwise, break this down into an LIS + disp.
2801 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2802
2803 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2804 MVT::i32);
2805 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2806 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2807 return true;
2808 }
2809 }
2810
2811 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2812 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2813 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2814 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2815 } else
2816 Base = N;
2817 return true; // [r+0]
2818}
2819
2820/// Similar to the 16-bit case but for instructions that take a 34-bit
2821/// displacement field (prefixed loads/stores).
2822bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2823 SDValue &Base,
2824 SelectionDAG &DAG) const {
2825 // Only on 64-bit targets.
2826 if (N.getValueType() != MVT::i64)
2827 return false;
2828
2829 SDLoc dl(N);
2830 int64_t Imm = 0;
2831
2832 if (N.getOpcode() == ISD::ADD) {
2833 if (!isIntS34Immediate(N.getOperand(1), Imm))
2834 return false;
2835 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2836 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2837 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2838 else
2839 Base = N.getOperand(0);
2840 return true;
2841 }
2842
2843 if (N.getOpcode() == ISD::OR) {
2844 if (!isIntS34Immediate(N.getOperand(1), Imm))
2845 return false;
2846 // If this is an or of disjoint bitfields, we can codegen this as an add
2847 // (for better address arithmetic) if the LHS and RHS of the OR are
2848 // provably disjoint.
2849 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2850 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2851 return false;
2852 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2853 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2854 else
2855 Base = N.getOperand(0);
2856 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2857 return true;
2858 }
2859
2860 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2861 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2862 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2863 return true;
2864 }
2865
2866 return false;
2867}
2868
2869/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2870/// represented as an indexed [r+r] operation.
2871bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2872 SDValue &Index,
2873 SelectionDAG &DAG) const {
2874 // Check to see if we can easily represent this as an [r+r] address. This
2875 // will fail if it thinks that the address is more profitably represented as
2876 // reg+imm, e.g. where imm = 0.
2877 if (SelectAddressRegReg(N, Base, Index, DAG))
2878 return true;
2879
2880 // If the address is the result of an add, we will utilize the fact that the
2881 // address calculation includes an implicit add. However, we can reduce
2882 // register pressure if we do not materialize a constant just for use as the
2883 // index register. We only get rid of the add if it is not an add of a
2884 // value and a 16-bit signed constant and both have a single use.
2885 int16_t imm = 0;
2886 if (N.getOpcode() == ISD::ADD &&
2887 (!isIntS16Immediate(N.getOperand(1), imm) ||
2888 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2889 Base = N.getOperand(0);
2890 Index = N.getOperand(1);
2891 return true;
2892 }
2893
2894 // Otherwise, do it the hard way, using R0 as the base register.
2895 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2896 N.getValueType());
2897 Index = N;
2898 return true;
2899}
2900
2901template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2902 Ty *PCRelCand = dyn_cast<Ty>(N);
2903 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2904}
2905
2906/// Returns true if this address is a PC Relative address.
2907/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2908/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2909bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2910 // This is a materialize PC Relative node. Always select this as PC Relative.
2911 Base = N;
2912 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2913 return true;
2914 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2915 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2916 isValidPCRelNode<JumpTableSDNode>(N) ||
2917 isValidPCRelNode<BlockAddressSDNode>(N))
2918 return true;
2919 return false;
2920}
2921
2922/// Returns true if we should use a direct load into vector instruction
2923/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2924static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2925
2926 // If there are any other uses other than scalar to vector, then we should
2927 // keep it as a scalar load -> direct move pattern to prevent multiple
2928 // loads.
2929 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2930 if (!LD)
2931 return false;
2932
2933 EVT MemVT = LD->getMemoryVT();
2934 if (!MemVT.isSimple())
2935 return false;
2936 switch(MemVT.getSimpleVT().SimpleTy) {
2937 case MVT::i64:
2938 break;
2939 case MVT::i32:
2940 if (!ST.hasP8Vector())
2941 return false;
2942 break;
2943 case MVT::i16:
2944 case MVT::i8:
2945 if (!ST.hasP9Vector())
2946 return false;
2947 break;
2948 default:
2949 return false;
2950 }
2951
2952 SDValue LoadedVal(N, 0);
2953 if (!LoadedVal.hasOneUse())
2954 return false;
2955
2956 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2957 UI != UE; ++UI)
2958 if (UI.getUse().get().getResNo() == 0 &&
2959 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2960 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2961 return false;
2962
2963 return true;
2964}
2965
2966/// getPreIndexedAddressParts - returns true by value, base pointer and
2967/// offset pointer and addressing mode by reference if the node's address
2968/// can be legally represented as pre-indexed load / store address.
2969bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2970 SDValue &Offset,
2971 ISD::MemIndexedMode &AM,
2972 SelectionDAG &DAG) const {
2973 if (DisablePPCPreinc) return false;
2974
2975 bool isLoad = true;
2976 SDValue Ptr;
2977 EVT VT;
2978 unsigned Alignment;
2979 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2980 Ptr = LD->getBasePtr();
2981 VT = LD->getMemoryVT();
2982 Alignment = LD->getAlignment();
2983 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2984 Ptr = ST->getBasePtr();
2985 VT = ST->getMemoryVT();
2986 Alignment = ST->getAlignment();
2987 isLoad = false;
2988 } else
2989 return false;
2990
2991 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2992 // instructions because we can fold these into a more efficient instruction
2993 // instead, (such as LXSD).
2994 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2995 return false;
2996 }
2997
2998 // PowerPC doesn't have preinc load/store instructions for vectors
2999 if (VT.isVector())
3000 return false;
3001
3002 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3003 // Common code will reject creating a pre-inc form if the base pointer
3004 // is a frame index, or if N is a store and the base pointer is either
3005 // the same as or a predecessor of the value being stored. Check for
3006 // those situations here, and try with swapped Base/Offset instead.
3007 bool Swap = false;
3008
3009 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3010 Swap = true;
3011 else if (!isLoad) {
3012 SDValue Val = cast<StoreSDNode>(N)->getValue();
3013 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3014 Swap = true;
3015 }
3016
3017 if (Swap)
3018 std::swap(Base, Offset);
3019
3020 AM = ISD::PRE_INC;
3021 return true;
3022 }
3023
3024 // LDU/STU can only handle immediates that are a multiple of 4.
3025 if (VT != MVT::i64) {
3026 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
3027 return false;
3028 } else {
3029 // LDU/STU need an address with at least 4-byte alignment.
3030 if (Alignment < 4)
3031 return false;
3032
3033 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3034 return false;
3035 }
3036
3037 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3038 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3039 // sext i32 to i64 when addr mode is r+i.
3040 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3041 LD->getExtensionType() == ISD::SEXTLOAD &&
3042 isa<ConstantSDNode>(Offset))
3043 return false;
3044 }
3045
3046 AM = ISD::PRE_INC;
3047 return true;
3048}
3049
3050//===----------------------------------------------------------------------===//
3051// LowerOperation implementation
3052//===----------------------------------------------------------------------===//
3053
3054/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3055/// and LoOpFlags to the target MO flags.
3056static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3057 unsigned &HiOpFlags, unsigned &LoOpFlags,
3058 const GlobalValue *GV = nullptr) {
3059 HiOpFlags = PPCII::MO_HA;
3060 LoOpFlags = PPCII::MO_LO;
3061
3062 // Don't use the pic base if not in PIC relocation model.
3063 if (IsPIC) {
3064 HiOpFlags |= PPCII::MO_PIC_FLAG;
3065 LoOpFlags |= PPCII::MO_PIC_FLAG;
3066 }
3067}
3068
3069static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3070 SelectionDAG &DAG) {
3071 SDLoc DL(HiPart);
3072 EVT PtrVT = HiPart.getValueType();
3073 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3074
3075 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3076 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3077
3078 // With PIC, the first instruction is actually "GR+hi(&G)".
3079 if (isPIC)
3080 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3081 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3082
3083 // Generate non-pic code that has direct accesses to the constant pool.
3084 // The address of the global is just (hi(&g)+lo(&g)).
3085 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3086}
3087
3088static void setUsesTOCBasePtr(MachineFunction &MF) {
3089 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3090 FuncInfo->setUsesTOCBasePtr();
3091}
3092
3093static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3094 setUsesTOCBasePtr(DAG.getMachineFunction());
3095}
3096
3097SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3098 SDValue GA) const {
3099 const bool Is64Bit = Subtarget.isPPC64();
3100 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3101 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3102 : Subtarget.isAIXABI()
3103 ? DAG.getRegister(PPC::R2, VT)
3104 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3105 SDValue Ops[] = { GA, Reg };
3106 return DAG.getMemIntrinsicNode(
3107 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3108 MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
3109 MachineMemOperand::MOLoad);
3110}
3111
3112SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3113 SelectionDAG &DAG) const {
3114 EVT PtrVT = Op.getValueType();
3115 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3116 const Constant *C = CP->getConstVal();
3117
3118 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3119 // The actual address of the GlobalValue is stored in the TOC.
3120 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3121 if (Subtarget.isUsingPCRelativeCalls()) {
3122 SDLoc DL(CP);
3123 EVT Ty = getPointerTy(DAG.getDataLayout());
3124 SDValue ConstPool = DAG.getTargetConstantPool(
3125 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3126 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3127 }
3128 setUsesTOCBasePtr(DAG);
3129 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3130 return getTOCEntry(DAG, SDLoc(CP), GA);
3131 }
3132
3133 unsigned MOHiFlag, MOLoFlag;
3134 bool IsPIC = isPositionIndependent();
3135 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3136
3137 if (IsPIC && Subtarget.isSVR4ABI()) {
3138 SDValue GA =
3139 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3140 return getTOCEntry(DAG, SDLoc(CP), GA);
3141 }
3142
3143 SDValue CPIHi =
3144 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3145 SDValue CPILo =
3146 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3147 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3148}
3149
3150// For 64-bit PowerPC, prefer the more compact relative encodings.
3151// This trades 32 bits per jump table entry for one or two instructions
3152// on the jump site.
3153unsigned PPCTargetLowering::getJumpTableEncoding() const {
3154 if (isJumpTableRelative())
3155 return MachineJumpTableInfo::EK_LabelDifference32;
3156
3157 return TargetLowering::getJumpTableEncoding();
3158}
3159
3160bool PPCTargetLowering::isJumpTableRelative() const {
3161 if (UseAbsoluteJumpTables)
3162 return false;
3163 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3164 return true;
3165 return TargetLowering::isJumpTableRelative();
3166}
3167
3168SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3169 SelectionDAG &DAG) const {
3170 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3171 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3172
3173 switch (getTargetMachine().getCodeModel()) {
3174 case CodeModel::Small:
3175 case CodeModel::Medium:
3176 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3177 default:
3178 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3179 getPointerTy(DAG.getDataLayout()));
3180 }
3181}
3182
3183const MCExpr *
3184PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3185 unsigned JTI,
3186 MCContext &Ctx) const {
3187 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3188 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3189
3190 switch (getTargetMachine().getCodeModel()) {
3191 case CodeModel::Small:
3192 case CodeModel::Medium:
3193 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3194 default:
3195 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3196 }
3197}
3198
3199SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3200 EVT PtrVT = Op.getValueType();
3201 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3202
3203 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3204 if (Subtarget.isUsingPCRelativeCalls()) {
3205 SDLoc DL(JT);
3206 EVT Ty = getPointerTy(DAG.getDataLayout());
3207 SDValue GA =
3208 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3209 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3210 return MatAddr;
3211 }
3212
3213 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3214 // The actual address of the GlobalValue is stored in the TOC.
3215 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3216 setUsesTOCBasePtr(DAG);
3217 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3218 return getTOCEntry(DAG, SDLoc(JT), GA);
3219 }
3220
3221 unsigned MOHiFlag, MOLoFlag;
3222 bool IsPIC = isPositionIndependent();
3223 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3224
3225 if (IsPIC && Subtarget.isSVR4ABI()) {
3226 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3227 PPCII::MO_PIC_FLAG);
3228 return getTOCEntry(DAG, SDLoc(GA), GA);
3229 }
3230
3231 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3232 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3233 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3234}
3235
3236SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3237 SelectionDAG &DAG) const {
3238 EVT PtrVT = Op.getValueType();
3239 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3240 const BlockAddress *BA = BASDN->getBlockAddress();
3241
3242 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3243 if (Subtarget.isUsingPCRelativeCalls()) {
3244 SDLoc DL(BASDN);
3245 EVT Ty = getPointerTy(DAG.getDataLayout());
3246 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3247 PPCII::MO_PCREL_FLAG);
3248 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3249 return MatAddr;
3250 }
3251
3252 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3253 // The actual BlockAddress is stored in the TOC.
3254 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3255 setUsesTOCBasePtr(DAG);
3256 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3257 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3258 }
3259
3260 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3261 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3262 return getTOCEntry(
3263 DAG, SDLoc(BASDN),
3264 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3265
3266 unsigned MOHiFlag, MOLoFlag;
3267 bool IsPIC = isPositionIndependent();
3268 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3269 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3270 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3271 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3272}
3273
3274SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3275 SelectionDAG &DAG) const {
3276 if (Subtarget.isAIXABI())
3277 return LowerGlobalTLSAddressAIX(Op, DAG);
3278
3279 return LowerGlobalTLSAddressLinux(Op, DAG);
3280}
3281
3282SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3283 SelectionDAG &DAG) const {
3284 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3285
3286 if (DAG.getTarget().useEmulatedTLS())
3287 report_fatal_error("Emulated TLS is not yet supported on AIX");
3288
3289 SDLoc dl(GA);
3290 const GlobalValue *GV = GA->getGlobal();
3291 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3292
3293 // The general-dynamic model is the only access model supported for now, so
3294 // all the GlobalTLSAddress nodes are lowered with this model.
3295 // We need to generate two TOC entries, one for the variable offset, one for
3296 // the region handle. The global address for the TOC entry of the region
3297 // handle is created with the MO_TLSGDM_FLAG flag and the global address
3298 // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3299 SDValue VariableOffsetTGA =
3300 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3301 SDValue RegionHandleTGA =
3302 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3303 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3304 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3305 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3306 RegionHandle);
3307}
3308
3309SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3310 SelectionDAG &DAG) const {
3311 // FIXME: TLS addresses currently use medium model code sequences,
3312 // which is the most useful form. Eventually support for small and
3313 // large models could be added if users need it, at the cost of
3314 // additional complexity.
3315 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3316 if (DAG.getTarget().useEmulatedTLS())
3317 return LowerToTLSEmulatedModel(GA, DAG);
3318
3319 SDLoc dl(GA);
3320 const GlobalValue *GV = GA->getGlobal();
3321 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3322 bool is64bit = Subtarget.isPPC64();
3323 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3324 PICLevel::Level picLevel = M->getPICLevel();
3325
3326 const TargetMachine &TM = getTargetMachine();
3327 TLSModel::Model Model = TM.getTLSModel(GV);
3328
3329 if (Model == TLSModel::LocalExec) {
3330 if (Subtarget.isUsingPCRelativeCalls()) {
3331 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3332 SDValue TGA = DAG.getTargetGlobalAddress(
3333 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3334 SDValue MatAddr =
3335 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3336 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3337 }
3338
3339 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3340 PPCII::MO_TPREL_HA);
3341 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3342 PPCII::MO_TPREL_LO);
3343 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3344 : DAG.getRegister(PPC::R2, MVT::i32);
3345
3346 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3347 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3348 }
3349
3350 if (Model == TLSModel::InitialExec) {
3351 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3352 SDValue TGA = DAG.getTargetGlobalAddress(
3353 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3354 SDValue TGATLS = DAG.getTargetGlobalAddress(
3355 GV, dl, PtrVT, 0,
3356 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3357 SDValue TPOffset;
3358 if (IsPCRel) {
3359 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3360 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3361 MachinePointerInfo());
3362 } else {
3363 SDValue GOTPtr;
3364 if (is64bit) {
3365 setUsesTOCBasePtr(DAG);
3366 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3367 GOTPtr =
3368 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3369 } else {
3370 if (!TM.isPositionIndependent())
3371 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3372 else if (picLevel == PICLevel::SmallPIC)
3373 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3374 else
3375 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3376 }
3377 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3378 }
3379 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3380 }
3381
3382 if (Model == TLSModel::GeneralDynamic) {
3383 if (Subtarget.isUsingPCRelativeCalls()) {
3384 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3385 PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3386 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3387 }
3388
3389 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3390 SDValue GOTPtr;
3391 if (is64bit) {
3392 setUsesTOCBasePtr(DAG);
3393 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3394 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3395 GOTReg, TGA);
3396 } else {
3397 if (picLevel == PICLevel::SmallPIC)
3398 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3399 else
3400 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3401 }
3402 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3403 GOTPtr, TGA, TGA);
3404 }
3405
3406 if (Model == TLSModel::LocalDynamic) {
3407 if (Subtarget.isUsingPCRelativeCalls()) {
3408 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3409 PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3410 SDValue MatPCRel =
3411 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3412 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3413 }
3414
3415 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3416 SDValue GOTPtr;
3417 if (is64bit) {
3418 setUsesTOCBasePtr(DAG);
3419 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3420 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3421 GOTReg, TGA);
3422 } else {
3423 if (picLevel == PICLevel::SmallPIC)
3424 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3425 else
3426 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3427 }
3428 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3429 PtrVT, GOTPtr, TGA, TGA);
3430 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3431 PtrVT, TLSAddr, TGA);
3432 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3433 }
3434
3435 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3435)
;
3436}
3437
3438SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3439 SelectionDAG &DAG) const {
3440 EVT PtrVT = Op.getValueType();
3441 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3442 SDLoc DL(GSDN);
3443 const GlobalValue *GV = GSDN->getGlobal();
3444
3445 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3446 // The actual address of the GlobalValue is stored in the TOC.
3447 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3448 if (Subtarget.isUsingPCRelativeCalls()) {
3449 EVT Ty = getPointerTy(DAG.getDataLayout());
3450 if (isAccessedAsGotIndirect(Op)) {
3451 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3452 PPCII::MO_PCREL_FLAG |
3453 PPCII::MO_GOT_FLAG);
3454 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3455 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3456 MachinePointerInfo());
3457 return Load;
3458 } else {
3459 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3460 PPCII::MO_PCREL_FLAG);
3461 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3462 }
3463 }
3464 setUsesTOCBasePtr(DAG);
3465 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3466 return getTOCEntry(DAG, DL, GA);
3467 }
3468
3469 unsigned MOHiFlag, MOLoFlag;
3470 bool IsPIC = isPositionIndependent();
3471 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3472
3473 if (IsPIC && Subtarget.isSVR4ABI()) {
3474 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3475 GSDN->getOffset(),
3476 PPCII::MO_PIC_FLAG);
3477 return getTOCEntry(DAG, DL, GA);
3478 }
3479
3480 SDValue GAHi =
3481 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3482 SDValue GALo =
3483 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3484
3485 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3486}
3487
3488SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3489 bool IsStrict = Op->isStrictFPOpcode();
3490 ISD::CondCode CC =
3491 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3492 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3493 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3494 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3495 EVT LHSVT = LHS.getValueType();
3496 SDLoc dl(Op);
3497
3498 // Soften the setcc with libcall if it is fp128.
3499 if (LHSVT == MVT::f128) {
3500 assert(!Subtarget.hasP9Vector() &&(static_cast <bool> (!Subtarget.hasP9Vector() &&
"SETCC for f128 is already legal under Power9!") ? void (0) :
__assert_fail ("!Subtarget.hasP9Vector() && \"SETCC for f128 is already legal under Power9!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3501, __extension__
__PRETTY_FUNCTION__))
3501 "SETCC for f128 is already legal under Power9!")(static_cast <bool> (!Subtarget.hasP9Vector() &&
"SETCC for f128 is already legal under Power9!") ? void (0) :
__assert_fail ("!Subtarget.hasP9Vector() && \"SETCC for f128 is already legal under Power9!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3501, __extension__
__PRETTY_FUNCTION__))
;
3502 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3503 Op->getOpcode() == ISD::STRICT_FSETCCS);
3504 if (RHS.getNode())
3505 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3506 DAG.getCondCode(CC));
3507 if (IsStrict)
3508 return DAG.getMergeValues({LHS, Chain}, dl);
3509 return LHS;
3510 }
3511
3512 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!")(static_cast <bool> (!IsStrict && "Don't know how to handle STRICT_FSETCC!"
) ? void (0) : __assert_fail ("!IsStrict && \"Don't know how to handle STRICT_FSETCC!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3512, __extension__
__PRETTY_FUNCTION__))
;
3513
3514 if (Op.getValueType() == MVT::v2i64) {
3515 // When the operands themselves are v2i64 values, we need to do something
3516 // special because VSX has no underlying comparison operations for these.
3517 if (LHS.getValueType() == MVT::v2i64) {
3518 // Equality can be handled by casting to the legal type for Altivec
3519 // comparisons, everything else needs to be expanded.
3520 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3521 return SDValue();
3522 SDValue SetCC32 = DAG.getSetCC(
3523 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3524 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3525 int ShuffV[] = {1, 0, 3, 2};
3526 SDValue Shuff =
3527 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3528 return DAG.getBitcast(MVT::v2i64,
3529 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3530 dl, MVT::v4i32, Shuff, SetCC32));
3531 }
3532
3533 // We handle most of these in the usual way.
3534 return Op;
3535 }
3536
3537 // If we're comparing for equality to zero, expose the fact that this is
3538 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3539 // fold the new nodes.
3540 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3541 return V;
3542
3543 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3544 // Leave comparisons against 0 and -1 alone for now, since they're usually
3545 // optimized. FIXME: revisit this when we can custom lower all setcc
3546 // optimizations.
3547 if (C->isAllOnes() || C->isZero())
3548 return SDValue();
3549 }
3550
3551 // If we have an integer seteq/setne, turn it into a compare against zero
3552 // by xor'ing the rhs with the lhs, which is faster than setting a
3553 // condition register, reading it back out, and masking the correct bit. The
3554 // normal approach here uses sub to do this instead of xor. Using xor exposes
3555 // the result to other bit-twiddling opportunities.
3556 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3557 EVT VT = Op.getValueType();
3558 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3559 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3560 }
3561 return SDValue();
3562}
3563
3564SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3565 SDNode *Node = Op.getNode();
3566 EVT VT = Node->getValueType(0);
3567 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3568 SDValue InChain = Node->getOperand(0);
3569 SDValue VAListPtr = Node->getOperand(1);
3570 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3571 SDLoc dl(Node);
3572
3573 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")(static_cast <bool> (!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"
) ? void (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3573, __extension__
__PRETTY_FUNCTION__))
;
3574
3575 // gpr_index
3576 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3577 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3578 InChain = GprIndex.getValue(1);
3579
3580 if (VT == MVT::i64) {
3581 // Check if GprIndex is even
3582 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3583 DAG.getConstant(1, dl, MVT::i32));
3584 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3585 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3586 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3587 DAG.getConstant(1, dl, MVT::i32));
3588 // Align GprIndex to be even if it isn't
3589 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3590 GprIndex);
3591 }
3592
3593 // fpr index is 1 byte after gpr
3594 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3595 DAG.getConstant(1, dl, MVT::i32));
3596
3597 // fpr
3598 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3599 FprPtr, MachinePointerInfo(SV), MVT::i8);
3600 InChain = FprIndex.getValue(1);
3601
3602 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3603 DAG.getConstant(8, dl, MVT::i32));
3604
3605 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3606 DAG.getConstant(4, dl, MVT::i32));
3607
3608 // areas
3609 SDValue OverflowArea =
3610 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3611 InChain = OverflowArea.getValue(1);
3612
3613 SDValue RegSaveArea =
3614 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3615 InChain = RegSaveArea.getValue(1);
3616
3617 // select overflow_area if index > 8
3618 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3619 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3620
3621 // adjustment constant gpr_index * 4/8
3622 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3623 VT.isInteger() ? GprIndex : FprIndex,
3624 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3625 MVT::i32));
3626
3627 // OurReg = RegSaveArea + RegConstant
3628 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3629 RegConstant);
3630
3631 // Floating types are 32 bytes into RegSaveArea
3632 if (VT.isFloatingPoint())
3633 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3634 DAG.getConstant(32, dl, MVT::i32));
3635
3636 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3637 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3638 VT.isInteger() ? GprIndex : FprIndex,
3639 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3640 MVT::i32));
3641
3642 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3643 VT.isInteger() ? VAListPtr : FprPtr,
3644 MachinePointerInfo(SV), MVT::i8);
3645
3646 // determine if we should load from reg_save_area or overflow_area
3647 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3648
3649 // increase overflow_area by 4/8 if gpr/fpr > 8
3650 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3651 DAG.getConstant(VT.isInteger() ? 4 : 8,
3652 dl, MVT::i32));
3653
3654 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3655 OverflowAreaPlusN);
3656
3657 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3658 MachinePointerInfo(), MVT::i32);
3659
3660 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3661}
3662
3663SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3664 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")(static_cast <bool> (!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? void (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3664, __extension__
__PRETTY_FUNCTION__))
;
3665
3666 // We have to copy the entire va_list struct:
3667 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3668 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3669 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3670 false, true, false, MachinePointerInfo(),
3671 MachinePointerInfo());
3672}
3673
3674SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3675 SelectionDAG &DAG) const {
3676 if (Subtarget.isAIXABI())
3677 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3678
3679 return Op.getOperand(0);
3680}
3681
3682SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3683 MachineFunction &MF = DAG.getMachineFunction();
3684 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3685
3686 assert((Op.getOpcode() == ISD::INLINEASM ||(static_cast <bool> ((Op.getOpcode() == ISD::INLINEASM ||
Op.getOpcode() == ISD::INLINEASM_BR) && "Expecting Inline ASM node."
) ? void (0) : __assert_fail ("(Op.getOpcode() == ISD::INLINEASM || Op.getOpcode() == ISD::INLINEASM_BR) && \"Expecting Inline ASM node.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3688, __extension__
__PRETTY_FUNCTION__))
3687 Op.getOpcode() == ISD::INLINEASM_BR) &&(static_cast <bool> ((Op.getOpcode() == ISD::INLINEASM ||
Op.getOpcode() == ISD::INLINEASM_BR) && "Expecting Inline ASM node."
) ? void (0) : __assert_fail ("(Op.getOpcode() == ISD::INLINEASM || Op.getOpcode() == ISD::INLINEASM_BR) && \"Expecting Inline ASM node.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3688, __extension__
__PRETTY_FUNCTION__))
3688 "Expecting Inline ASM node.")(static_cast <bool> ((Op.getOpcode() == ISD::INLINEASM ||
Op.getOpcode() == ISD::INLINEASM_BR) && "Expecting Inline ASM node."
) ? void (0) : __assert_fail ("(Op.getOpcode() == ISD::INLINEASM || Op.getOpcode() == ISD::INLINEASM_BR) && \"Expecting Inline ASM node.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3688, __extension__
__PRETTY_FUNCTION__))
;
3689
3690 // If an LR store is already known to be required then there is not point in
3691 // checking this ASM as well.
3692 if (MFI.isLRStoreRequired())
3693 return Op;
3694
3695 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3696 // type MVT::Glue. We want to ignore this last operand if that is the case.
3697 unsigned NumOps = Op.getNumOperands();
3698 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3699 --NumOps;
3700
3701 // Check all operands that may contain the LR.
3702 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3703 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3704 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3705 ++i; // Skip the ID value.
3706
3707 switch (InlineAsm::getKind(Flags)) {
3708 default:
3709 llvm_unreachable("Bad flags!")::llvm::llvm_unreachable_internal("Bad flags!", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3709)
;
3710 case InlineAsm::Kind_RegUse:
3711 case InlineAsm::Kind_Imm:
3712 case InlineAsm::Kind_Mem:
3713 i += NumVals;
3714 break;
3715 case InlineAsm::Kind_Clobber:
3716 case InlineAsm::Kind_RegDef:
3717 case InlineAsm::Kind_RegDefEarlyClobber: {
3718 for (; NumVals; --NumVals, ++i) {
3719 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3720 if (Reg != PPC::LR && Reg != PPC::LR8)
3721 continue;
3722 MFI.setLRStoreRequired();
3723 return Op;
3724 }
3725 break;
3726 }
3727 }
3728 }
3729
3730 return Op;
3731}
3732
3733SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3734 SelectionDAG &DAG) const {
3735 if (Subtarget.isAIXABI())
3736 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3737
3738 SDValue Chain = Op.getOperand(0);
3739 SDValue Trmp = Op.getOperand(1); // trampoline
3740 SDValue FPtr = Op.getOperand(2); // nested function
3741 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3742 SDLoc dl(Op);
3743
3744 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3745 bool isPPC64 = (PtrVT == MVT::i64);
3746 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3747
3748 TargetLowering::ArgListTy Args;
3749 TargetLowering::ArgListEntry Entry;
3750
3751 Entry.Ty = IntPtrTy;
3752 Entry.Node = Trmp; Args.push_back(Entry);
3753
3754 // TrampSize == (isPPC64 ? 48 : 40);
3755 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3756 isPPC64 ? MVT::i64 : MVT::i32);
3757 Args.push_back(Entry);
3758
3759 Entry.Node = FPtr; Args.push_back(Entry);
3760 Entry.Node = Nest; Args.push_back(Entry);
3761
3762 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3763 TargetLowering::CallLoweringInfo CLI(DAG);
3764 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3765 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3766 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3767
3768 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3769 return CallResult.second;
3770}
3771
3772SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3773 MachineFunction &MF = DAG.getMachineFunction();
3774 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3775 EVT PtrVT = getPointerTy(MF.getDataLayout());
3776
3777 SDLoc dl(Op);
3778
3779 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3780 // vastart just stores the address of the VarArgsFrameIndex slot into the
3781 // memory location argument.
3782 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3783 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3784 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3785 MachinePointerInfo(SV));
3786 }
3787
3788 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3789 // We suppose the given va_list is already allocated.
3790 //
3791 // typedef struct {
3792 // char gpr; /* index into the array of 8 GPRs
3793 // * stored in the register save area
3794 // * gpr=0 corresponds to r3,
3795 // * gpr=1 to r4, etc.
3796 // */
3797 // char fpr; /* index into the array of 8 FPRs
3798 // * stored in the register save area
3799 // * fpr=0 corresponds to f1,
3800 // * fpr=1 to f2, etc.
3801 // */
3802 // char *overflow_arg_area;
3803 // /* location on stack that holds
3804 // * the next overflow argument
3805 // */
3806 // char *reg_save_area;
3807 // /* where r3:r10 and f1:f8 (if saved)
3808 // * are stored
3809 // */
3810 // } va_list[1];
3811
3812 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3813 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3814 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3815 PtrVT);
3816 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3817 PtrVT);
3818
3819 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3820 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3821
3822 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3823 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3824
3825 uint64_t FPROffset = 1;
3826 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3827
3828 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3829
3830 // Store first byte : number of int regs
3831 SDValue firstStore =
3832 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3833 MachinePointerInfo(SV), MVT::i8);
3834 uint64_t nextOffset = FPROffset;
3835 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3836 ConstFPROffset);
3837
3838 // Store second byte : number of float regs
3839 SDValue secondStore =
3840 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3841 MachinePointerInfo(SV, nextOffset), MVT::i8);
3842 nextOffset += StackOffset;
3843 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3844
3845 // Store second word : arguments given on stack
3846 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3847 MachinePointerInfo(SV, nextOffset));
3848 nextOffset += FrameOffset;
3849 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3850
3851 // Store third word : arguments given in registers
3852 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3853 MachinePointerInfo(SV, nextOffset));
3854}
3855
3856/// FPR - The set of FP registers that should be allocated for arguments
3857/// on Darwin and AIX.
3858static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3859 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3860 PPC::F11, PPC::F12, PPC::F13};
3861
3862/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3863/// the stack.
3864static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3865 unsigned PtrByteSize) {
3866 unsigned ArgSize = ArgVT.getStoreSize();
3867 if (Flags.isByVal())
3868 ArgSize = Flags.getByValSize();
3869
3870 // Round up to multiples of the pointer size, except for array members,
3871 // which are always packed.
3872 if (!Flags.isInConsecutiveRegs())
3873 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3874
3875 return ArgSize;
3876}
3877
3878/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3879/// on the stack.
3880static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3881 ISD::ArgFlagsTy Flags,
3882 unsigned PtrByteSize) {
3883 Align Alignment(PtrByteSize);
3884
3885 // Altivec parameters are padded to a 16 byte boundary.
3886 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3887 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3888 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3889 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3890 Alignment = Align(16);
3891
3892 // ByVal parameters are aligned as requested.
3893 if (Flags.isByVal()) {
3894 auto BVAlign = Flags.getNonZeroByValAlign();
3895 if (BVAlign > PtrByteSize) {
3896 if (BVAlign.value() % PtrByteSize != 0)
3897 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3898)
3898 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 3898)
;
3899
3900 Alignment = BVAlign;
3901 }
3902 }
3903
3904 // Array members are always packed to their original alignment.
3905 if (Flags.isInConsecutiveRegs()) {
3906 // If the array member was split into multiple registers, the first
3907 // needs to be aligned to the size of the full type. (Except for
3908 // ppcf128, which is only aligned as its f64 components.)
3909 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3910 Alignment = Align(OrigVT.getStoreSize());
3911 else
3912 Alignment = Align(ArgVT.getStoreSize());
3913 }
3914
3915 return Alignment;
3916}
3917
3918/// CalculateStackSlotUsed - Return whether this argument will use its
3919/// stack slot (instead of being passed in registers). ArgOffset,
3920/// AvailableFPRs, and AvailableVRs must hold the current argument
3921/// position, and will be updated to account for this argument.
3922static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3923 unsigned PtrByteSize, unsigned LinkageSize,
3924 unsigned ParamAreaSize, unsigned &ArgOffset,
3925 unsigned &AvailableFPRs,
3926 unsigned &AvailableVRs) {
3927 bool UseMemory = false;
3928
3929 // Respect alignment of argument on the stack.
3930 Align Alignment =
3931 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3932 ArgOffset = alignTo(ArgOffset, Alignment);
3933 // If there's no space left in the argument save area, we must
3934 // use memory (this check also catches zero-sized arguments).
3935 if (ArgOffset >= LinkageSize + ParamAreaSize)
3936 UseMemory = true;
3937
3938 // Allocate argument on the stack.
3939 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3940 if (Flags.isInConsecutiveRegsLast())
3941 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3942 // If we overran the argument save area, we must use memory
3943 // (this check catches arguments passed partially in memory)
3944 if (ArgOffset > LinkageSize + ParamAreaSize)
3945 UseMemory = true;
3946
3947 // However, if the argument is actually passed in an FPR or a VR,
3948 // we don't use memory after all.
3949 if (!Flags.isByVal()) {
3950 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3951 if (AvailableFPRs > 0) {
3952 --AvailableFPRs;
3953 return false;
3954 }
3955 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3956 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3957 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3958 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3959 if (AvailableVRs > 0) {
3960 --AvailableVRs;
3961 return false;
3962 }
3963 }
3964
3965 return UseMemory;
3966}
3967
3968/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3969/// ensure minimum alignment required for target.
3970static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3971 unsigned NumBytes) {
3972 return alignTo(NumBytes, Lowering->getStackAlign());
3973}
3974
3975SDValue PPCTargetLowering::LowerFormalArguments(
3976 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3977 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3978 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3979 if (Subtarget.isAIXABI())
3980 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3981 InVals);
3982 if (Subtarget.is64BitELFABI())
3983 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3984 InVals);
3985 assert(Subtarget.is32BitELFABI())(static_cast <bool> (Subtarget.is32BitELFABI()) ? void (
0) : __assert_fail ("Subtarget.is32BitELFABI()", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3985, __extension__ __PRETTY_FUNCTION__))
;
3986 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3987 InVals);
3988}
3989
3990SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3991 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3992 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3993 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3994
3995 // 32-bit SVR4 ABI Stack Frame Layout:
3996 // +-----------------------------------+
3997 // +--> | Back chain |
3998 // | +-----------------------------------+
3999 // | | Floating-point register save area |
4000 // | +-----------------------------------+
4001 // | | General register save area |
4002 // | +-----------------------------------+
4003 // | | CR save word |
4004 // | +-----------------------------------+
4005 // | | VRSAVE save word |
4006 // | +-----------------------------------+
4007 // | | Alignment padding |
4008 // | +-----------------------------------+
4009 // | | Vector register save area |
4010 // | +-----------------------------------+
4011 // | | Local variable space |
4012 // | +-----------------------------------+
4013 // | | Parameter list area |
4014 // | +-----------------------------------+
4015 // | | LR save word |
4016 // | +-----------------------------------+
4017 // SP--> +--- | Back chain |
4018 // +-----------------------------------+
4019 //
4020 // Specifications:
4021 // System V Application Binary Interface PowerPC Processor Supplement
4022 // AltiVec Technology Programming Interface Manual
4023
4024 MachineFunction &MF = DAG.getMachineFunction();
4025 MachineFrameInfo &MFI = MF.getFrameInfo();
4026 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4027
4028 EVT PtrVT = getPointerTy(MF.getDataLayout());
4029 // Potential tail calls could cause overwriting of argument stack slots.
4030 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4031 (CallConv == CallingConv::Fast));
4032 const Align PtrAlign(4);
4033
4034 // Assign locations to all of the incoming arguments.
4035 SmallVector<CCValAssign, 16> ArgLocs;
4036 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4037 *DAG.getContext());
4038
4039 // Reserve space for the linkage area on the stack.
4040 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4041 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4042 if (useSoftFloat())
4043 CCInfo.PreAnalyzeFormalArguments(Ins);
4044
4045 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4046 CCInfo.clearWasPPCF128();
4047
4048 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4049 CCValAssign &VA = ArgLocs[i];
4050
4051 // Arguments stored in registers.
4052 if (VA.isRegLoc()) {
4053 const TargetRegisterClass *RC;
4054 EVT ValVT = VA.getValVT();
4055
4056 switch (ValVT.getSimpleVT().SimpleTy) {
4057 default:
4058 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4058)
;
4059 case MVT::i1:
4060 case MVT::i32:
4061 RC = &PPC::GPRCRegClass;
4062 break;
4063 case MVT::f32:
4064 if (Subtarget.hasP8Vector())
4065 RC = &PPC::VSSRCRegClass;
4066 else if (Subtarget.hasSPE())
4067 RC = &PPC::GPRCRegClass;
4068 else
4069 RC = &PPC::F4RCRegClass;
4070 break;
4071 case MVT::f64:
4072 if (Subtarget.hasVSX())
4073 RC = &PPC::VSFRCRegClass;
4074 else if (Subtarget.hasSPE())
4075 // SPE passes doubles in GPR pairs.
4076 RC = &PPC::GPRCRegClass;
4077 else
4078 RC = &PPC::F8RCRegClass;
4079 break;
4080 case MVT::v16i8:
4081 case MVT::v8i16:
4082 case MVT::v4i32:
4083 RC = &PPC::VRRCRegClass;
4084 break;
4085 case MVT::v4f32:
4086 RC = &PPC::VRRCRegClass;
4087 break;
4088 case MVT::v2f64:
4089 case MVT::v2i64:
4090 RC = &PPC::VRRCRegClass;
4091 break;
4092 }
4093
4094 SDValue ArgValue;
4095 // Transform the arguments stored in physical registers into
4096 // virtual ones.
4097 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4098 assert(i + 1 < e && "No second half of double precision argument")(static_cast <bool> (i + 1 < e && "No second half of double precision argument"
) ? void (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4098, __extension__
__PRETTY_FUNCTION__))
;
4099 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4100 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4101 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4102 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4103 if (!Subtarget.isLittleEndian())
4104 std::swap (ArgValueLo, ArgValueHi);
4105 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4106 ArgValueHi);
4107 } else {
4108 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4109 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4110 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4111 if (ValVT == MVT::i1)
4112 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4113 }
4114
4115 InVals.push_back(ArgValue);
4116 } else {
4117 // Argument stored in memory.
4118 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4118, __extension__ __PRETTY_FUNCTION__))
;
4119
4120 // Get the extended size of the argument type in stack
4121 unsigned ArgSize = VA.getLocVT().getStoreSize();
4122 // Get the actual size of the argument type
4123 unsigned ObjSize = VA.getValVT().getStoreSize();
4124 unsigned ArgOffset = VA.getLocMemOffset();
4125 // Stack objects in PPC32 are right justified.
4126 ArgOffset += ArgSize - ObjSize;
4127 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4128
4129 // Create load nodes to retrieve arguments from the stack.
4130 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4131 InVals.push_back(
4132 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4133 }
4134 }
4135
4136 // Assign locations to all of the incoming aggregate by value arguments.
4137 // Aggregates passed by value are stored in the local variable space of the
4138 // caller's stack frame, right above the parameter list area.
4139 SmallVector<CCValAssign, 16> ByValArgLocs;
4140 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4141 ByValArgLocs, *DAG.getContext());
4142
4143 // Reserve stack space for the allocations in CCInfo.
4144 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4145
4146 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4147
4148 // Area that is at least reserved in the caller of this function.
4149 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4150 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4151
4152 // Set the size that is at least reserved in caller of this function. Tail
4153 // call optimized function's reserved stack space needs to be aligned so that
4154 // taking the difference between two stack areas will result in an aligned
4155 // stack.
4156 MinReservedArea =
4157 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4158 FuncInfo->setMinReservedArea(MinReservedArea);
4159
4160 SmallVector<SDValue, 8> MemOps;
4161
4162 // If the function takes variable number of arguments, make a frame index for
4163 // the start of the first vararg value... for expansion of llvm.va_start.
4164 if (isVarArg) {
4165 static const MCPhysReg GPArgRegs[] = {
4166 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4167 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4168 };
4169 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
4170
4171 static const MCPhysReg FPArgRegs[] = {
4172 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4173 PPC::F8
4174 };
4175 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
4176
4177 if (useSoftFloat() || hasSPE())
4178 NumFPArgRegs = 0;
4179
4180 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4181 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4182
4183 // Make room for NumGPArgRegs and NumFPArgRegs.
4184 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4185 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4186
4187 FuncInfo->setVarArgsStackOffset(
4188 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4189 CCInfo.getNextStackOffset(), true));
4190
4191 FuncInfo->setVarArgsFrameIndex(
4192 MFI.CreateStackObject(Depth, Align(8), false));
4193 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4194
4195 // The fixed integer arguments of a variadic function are stored to the
4196 // VarArgsFrameIndex on the stack so that they may be loaded by
4197 // dereferencing the result of va_next.
4198 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4199 // Get an existing live-in vreg, or add a new one.
4200 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4201 if (!VReg)
4202 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4203
4204 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4205 SDValue Store =
4206 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4207 MemOps.push_back(Store);
4208 // Increment the address by four for the next argument to store
4209 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4210 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4211 }
4212
4213 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4214 // is set.
4215 // The double arguments are stored to the VarArgsFrameIndex
4216 // on the stack.
4217 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4218 // Get an existing live-in vreg, or add a new one.
4219 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4220 if (!VReg)
4221 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4222
4223 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4224 SDValue Store =
4225 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4226 MemOps.push_back(Store);
4227 // Increment the address by eight for the next argument to store
4228 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4229 PtrVT);
4230 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4231 }
4232 }
4233
4234 if (!MemOps.empty())
4235 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4236
4237 return Chain;
4238}
4239
4240// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4241// value to MVT::i64 and then truncate to the correct register size.
4242SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4243 EVT ObjectVT, SelectionDAG &DAG,
4244 SDValue ArgVal,
4245 const SDLoc &dl) const {
4246 if (Flags.isSExt())
4247 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4248 DAG.getValueType(ObjectVT));
4249 else if (Flags.isZExt())
4250 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4251 DAG.getValueType(ObjectVT));
4252
4253 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4254}
4255
4256SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4257 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4258 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4259 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4260 // TODO: add description of PPC stack frame format, or at least some docs.
4261 //
4262 bool isELFv2ABI = Subtarget.isELFv2ABI();
4263 bool isLittleEndian = Subtarget.isLittleEndian();
4264 MachineFunction &MF = DAG.getMachineFunction();
4265 MachineFrameInfo &MFI = MF.getFrameInfo();
4266 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4267
4268 assert(!(CallConv == CallingConv::Fast && isVarArg) &&(static_cast <bool> (!(CallConv == CallingConv::Fast &&
isVarArg) && "fastcc not supported on varargs functions"
) ? void (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4269 "fastcc not supported on varargs functions")(static_cast <bool> (!(CallConv == CallingConv::Fast &&
isVarArg) && "fastcc not supported on varargs functions"
) ? void (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
;
4270
4271 EVT PtrVT = getPointerTy(MF.getDataLayout());
4272 // Potential tail calls could cause overwriting of argument stack slots.
4273 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4274 (CallConv == CallingConv::Fast));
4275 unsigned PtrByteSize = 8;
4276 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4277
4278 static const MCPhysReg GPR[] = {
4279 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4280 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4281 };
4282 static const MCPhysReg VR[] = {
4283 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4284 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4285 };
4286
4287 const unsigned Num_GPR_Regs = array_lengthof(GPR);
4288 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4289 const unsigned Num_VR_Regs = array_lengthof(VR);
4290
4291 // Do a first pass over the arguments to determine whether the ABI
4292 // guarantees that our caller has allocated the parameter save area
4293 // on its stack frame. In the ELFv1 ABI, this is always the case;
4294 // in the ELFv2 ABI, it is true if this is a vararg function or if
4295 // any parameter is located in a stack slot.
4296
4297 bool HasParameterArea = !isELFv2ABI || isVarArg;
4298 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4299 unsigned NumBytes = LinkageSize;
4300 unsigned AvailableFPRs = Num_FPR_Regs;
4301 unsigned AvailableVRs = Num_VR_Regs;
4302 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4303 if (Ins[i].Flags.isNest())
4304 continue;
4305
4306 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4307 PtrByteSize, LinkageSize, ParamAreaSize,
4308 NumBytes, AvailableFPRs, AvailableVRs))
4309 HasParameterArea = true;
4310 }
4311
4312 // Add DAG nodes to load the arguments or copy them out of registers. On
4313 // entry to a function on PPC, the arguments start after the linkage area,
4314 // although the first ones are often in registers.
4315
4316 unsigned ArgOffset = LinkageSize;
4317 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4318 SmallVector<SDValue, 8> MemOps;
4319 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4320 unsigned CurArgIdx = 0;
4321 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4322 SDValue ArgVal;
4323 bool needsLoad = false;
4324 EVT ObjectVT = Ins[ArgNo].VT;
4325 EVT OrigVT = Ins[ArgNo].ArgVT;
4326 unsigned ObjSize = ObjectVT.getStoreSize();
4327 unsigned ArgSize = ObjSize;
4328 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4329 if (Ins[ArgNo].isOrigArg()) {
4330 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4331 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4332 }
4333 // We re-align the argument offset for each argument, except when using the
4334 // fast calling convention, when we need to make sure we do that only when
4335 // we'll actually use a stack slot.
4336 unsigned CurArgOffset;
4337 Align Alignment;
4338 auto ComputeArgOffset = [&]() {
4339 /* Respect alignment of argument on the stack. */
4340 Alignment =
4341 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4342 ArgOffset = alignTo(ArgOffset, Alignment);
4343 CurArgOffset = ArgOffset;
4344 };
4345
4346 if (CallConv != CallingConv::Fast) {
4347 ComputeArgOffset();
4348
4349 /* Compute GPR index associated with argument offset. */
4350 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4351 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4352 }
4353
4354 // FIXME the codegen can be much improved in some cases.
4355 // We do not have to keep everything in memory.
4356 if (Flags.isByVal()) {
4357 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")(static_cast <bool> (Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4357, __extension__
__PRETTY_FUNCTION__))
;
4358
4359 if (CallConv == CallingConv::Fast)
4360 ComputeArgOffset();
4361
4362 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4363 ObjSize = Flags.getByValSize();
4364 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4365 // Empty aggregate parameters do not take up registers. Examples:
4366 // struct { } a;
4367 // union { } b;
4368 // int c[0];
4369 // etc. However, we have to provide a place-holder in InVals, so
4370 // pretend we have an 8-byte item at the current address for that
4371 // purpose.
4372 if (!ObjSize) {
4373 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4374 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4375 InVals.push_back(FIN);
4376 continue;
4377 }
4378
4379 // Create a stack object covering all stack doublewords occupied
4380 // by the argument. If the argument is (fully or partially) on
4381 // the stack, or if the argument is fully in registers but the
4382 // caller has allocated the parameter save anyway, we can refer
4383 // directly to the caller's stack frame. Otherwise, create a
4384 // local copy in our own frame.
4385 int FI;
4386 if (HasParameterArea ||
4387 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4388 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4389 else
4390 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4391 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4392
4393 // Handle aggregates smaller than 8 bytes.
4394 if (ObjSize < PtrByteSize) {
4395 // The value of the object is its address, which differs from the
4396 // address of the enclosing doubleword on big-endian systems.
4397 SDValue Arg = FIN;
4398 if (!isLittleEndian) {
4399 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4400 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4401 }
4402 InVals.push_back(Arg);
4403
4404 if (GPR_idx != Num_GPR_Regs) {
4405 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4406 FuncInfo->addLiveInAttr(VReg, Flags);
4407 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4408 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4409 SDValue Store =
4410 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4411 MachinePointerInfo(&*FuncArg), ObjType);
4412 MemOps.push_back(Store);
4413 }
4414 // Whether we copied from a register or not, advance the offset
4415 // into the parameter save area by a full doubleword.
4416 ArgOffset += PtrByteSize;
4417 continue;
4418 }
4419
4420 // The value of the object is its address, which is the address of
4421 // its first stack doubleword.
4422 InVals.push_back(FIN);
4423
4424 // Store whatever pieces of the object are in registers to memory.
4425 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4426 if (GPR_idx == Num_GPR_Regs)
4427 break;
4428
4429 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4430 FuncInfo->addLiveInAttr(VReg, Flags);
4431 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4432 SDValue Addr = FIN;
4433 if (j) {
4434 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4435 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4436 }
4437 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4438 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4439 SDValue Store =
4440 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4441 MachinePointerInfo(&*FuncArg, j), ObjType);
4442 MemOps.push_back(Store);
4443 ++GPR_idx;
4444 }
4445 ArgOffset += ArgSize;
4446 continue;
4447 }
4448
4449 switch (ObjectVT.getSimpleVT().SimpleTy) {
4450 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4450)
;
4451 case MVT::i1:
4452 case MVT::i32:
4453 case MVT::i64:
4454 if (Flags.isNest()) {
4455 // The 'nest' parameter, if any, is passed in R11.
4456 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4457 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4458
4459 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4460 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4461
4462 break;
4463 }
4464
4465 // These can be scalar arguments or elements of an integer array type
4466 // passed directly. Clang may use those instead of "byval" aggregate
4467 // types to avoid forcing arguments to memory unnecessarily.
4468 if (GPR_idx != Num_GPR_Regs) {
4469 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4470 FuncInfo->addLiveInAttr(VReg, Flags);
4471 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4472
4473 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4474 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4475 // value to MVT::i64 and then truncate to the correct register size.
4476 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4477 } else {
4478 if (CallConv == CallingConv::Fast)
4479 ComputeArgOffset();
4480
4481 needsLoad = true;
4482 ArgSize = PtrByteSize;
4483 }
4484 if (CallConv != CallingConv::Fast || needsLoad)
4485 ArgOffset += 8;
4486 break;
4487
4488 case MVT::f32:
4489 case MVT::f64:
4490 // These can be scalar arguments or elements of a float array type
4491 // passed directly. The latter are used to implement ELFv2 homogenous
4492 // float aggregates.
4493 if (FPR_idx != Num_FPR_Regs) {
4494 unsigned VReg;
4495
4496 if (ObjectVT == MVT::f32)
4497 VReg = MF.addLiveIn(FPR[FPR_idx],
4498 Subtarget.hasP8Vector()
4499 ? &PPC::VSSRCRegClass
4500 : &PPC::F4RCRegClass);
4501 else
4502 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4503 ? &PPC::VSFRCRegClass
4504 : &PPC::F8RCRegClass);
4505
4506 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4507 ++FPR_idx;
4508 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4509 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4510 // once we support fp <-> gpr moves.
4511
4512 // This can only ever happen in the presence of f32 array types,
4513 // since otherwise we never run out of FPRs before running out
4514 // of GPRs.
4515 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4516 FuncInfo->addLiveInAttr(VReg, Flags);
4517 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4518
4519 if (ObjectVT == MVT::f32) {
4520 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4521 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4522 DAG.getConstant(32, dl, MVT::i32));
4523 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4524 }
4525
4526 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4527 } else {
4528 if (CallConv == CallingConv::Fast)
4529 ComputeArgOffset();
4530
4531 needsLoad = true;
4532 }
4533
4534 // When passing an array of floats, the array occupies consecutive
4535 // space in the argument area; only round up to the next doubleword
4536 // at the end of the array. Otherwise, each float takes 8 bytes.
4537 if (CallConv != CallingConv::Fast || needsLoad) {
4538 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4539 ArgOffset += ArgSize;
4540 if (Flags.isInConsecutiveRegsLast())
4541 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4542 }
4543 break;
4544 case MVT::v4f32:
4545 case MVT::v4i32:
4546 case MVT::v8i16:
4547 case MVT::v16i8:
4548 case MVT::v2f64:
4549 case MVT::v2i64:
4550 case MVT::v1i128:
4551 case MVT::f128:
4552 // These can be scalar arguments or elements of a vector array type
4553 // passed directly. The latter are used to implement ELFv2 homogenous
4554 // vector aggregates.
4555 if (VR_idx != Num_VR_Regs) {
4556 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4557 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4558 ++VR_idx;
4559 } else {
4560 if (CallConv == CallingConv::Fast)
4561 ComputeArgOffset();
4562 needsLoad = true;
4563 }
4564 if (CallConv != CallingConv::Fast || needsLoad)
4565 ArgOffset += 16;
4566 break;
4567 }
4568
4569 // We need to load the argument to a virtual register if we determined
4570 // above that we ran out of physical registers of the appropriate type.
4571 if (needsLoad) {
4572 if (ObjSize < ArgSize && !isLittleEndian)
4573 CurArgOffset += ArgSize - ObjSize;
4574 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4575 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4576 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4577 }
4578
4579 InVals.push_back(ArgVal);
4580 }
4581
4582 // Area that is at least reserved in the caller of this function.
4583 unsigned MinReservedArea;
4584 if (HasParameterArea)
4585 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4586 else
4587 MinReservedArea = LinkageSize;
4588
4589 // Set the size that is at least reserved in caller of this function. Tail
4590 // call optimized functions' reserved stack space needs to be aligned so that
4591 // taking the difference between two stack areas will result in an aligned
4592 // stack.
4593 MinReservedArea =
4594 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4595 FuncInfo->setMinReservedArea(MinReservedArea);
4596
4597 // If the function takes variable number of arguments, make a frame index for
4598 // the start of the first vararg value... for expansion of llvm.va_start.
4599 // On ELFv2ABI spec, it writes:
4600 // C programs that are intended to be *portable* across different compilers
4601 // and architectures must use the header file <stdarg.h> to deal with variable
4602 // argument lists.
4603 if (isVarArg && MFI.hasVAStart()) {
4604 int Depth = ArgOffset;
4605
4606 FuncInfo->setVarArgsFrameIndex(
4607 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4608 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4609
4610 // If this function is vararg, store any remaining integer argument regs
4611 // to their spots on the stack so that they may be loaded by dereferencing
4612 // the result of va_next.
4613 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4614 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4615 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4616 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4617 SDValue Store =
4618 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4619 MemOps.push_back(Store);
4620 // Increment the address by four for the next argument to store
4621 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4622 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4623 }
4624 }
4625
4626 if (!MemOps.empty())
4627 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4628
4629 return Chain;
4630}
4631
4632/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4633/// adjusted to accommodate the arguments for the tailcall.
4634static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4635 unsigned ParamSize) {
4636
4637 if (!isTailCall) return 0;
4638
4639 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4640 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4641 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4642 // Remember only if the new adjustment is bigger.
4643 if (SPDiff < FI->getTailCallSPDelta())
4644 FI->setTailCallSPDelta(SPDiff);
4645
4646 return SPDiff;
4647}
4648
4649static bool isFunctionGlobalAddress(SDValue Callee);
4650
4651static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4652 const TargetMachine &TM) {
4653 // It does not make sense to call callsShareTOCBase() with a caller that
4654 // is PC Relative since PC Relative callers do not have a TOC.
4655#ifndef NDEBUG
4656 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4657 assert(!STICaller->isUsingPCRelativeCalls() &&(static_cast <bool> (!STICaller->isUsingPCRelativeCalls
() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? void (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4658, __extension__
__PRETTY_FUNCTION__))
4658 "PC Relative callers do not have a TOC and cannot share a TOC Base")(static_cast <bool> (!STICaller->isUsingPCRelativeCalls
() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? void (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 4658, __extension__
__PRETTY_FUNCTION__))
;
4659#endif
4660
4661 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4662 // don't have enough information to determine if the caller and callee share
4663 // the same TOC base, so we have to pessimistically assume they don't for
4664 // correctness.
4665 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4666 if (!G)
4667 return false;
4668
4669 const GlobalValue *GV = G->getGlobal();
4670
4671 // If the callee is preemptable, then the static linker will use a plt-stub
4672 // which saves the toc to the stack, and needs a nop after the call
4673 // instruction to convert to a toc-restore.
4674 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4675 return false;
4676
4677 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4678 // We may need a TOC restore in the situation where the caller requires a
4679 // valid TOC but the callee is PC Relative and does not.
4680 const Function *F = dyn_cast<Function>(GV);
4681 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4682
4683 // If we have an Alias we can try to get the function from there.
4684 if (Alias) {
4685 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4686 F = dyn_cast<Function>(GlobalObj);
4687 }
4688
4689 // If we still have no valid function pointer we do not have enough
4690 // information to determine if the callee uses PC Relative calls so we must
4691 // assume that it does.
4692 if (!F)
4693 return false;
4694
4695 // If the callee uses PC Relative we cannot guarantee that the callee won't
4696 // clobber the TOC of the caller and so we must assume that the two
4697 // functions do not share a TOC base.
4698 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4699 if (STICallee->isUsingPCRelativeCalls())
4700 return false;
4701
4702 // If the GV is not a strong definition then we need to assume it can be
4703 // replaced by another function at link time. The function that replaces
4704 // it may not share the same TOC as the caller since the callee may be
4705 // replaced by a PC Relative version of the same function.
4706 if (!GV->isStrongDefinitionForLinker())
4707 return false;
4708
4709 // The medium and large code models are expected to provide a sufficiently
4710 // large TOC to provide all data addressing needs of a module with a
4711 // single TOC.
4712 if (CodeModel::Medium == TM.getCodeModel() ||
4713 CodeModel::Large == TM.getCodeModel())
4714 return true;
4715
4716 // Any explicitly-specified sections and section prefixes must also match.
4717 // Also, if we're using -ffunction-sections, then each function is always in
4718 // a different section (the same is true for COMDAT functions).
4719 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4720 GV->getSection() != Caller->getSection())
4721 return false;
4722 if (const auto *F = dyn_cast<Function>(GV)) {
4723 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4724 return false;
4725 }
4726
4727 return true;
4728}
4729
4730static bool
4731needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4732 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4733 assert(Subtarget.is64BitELFABI())(static_cast <bool> (Subtarget.is64BitELFABI()) ? void (
0) : __assert_fail ("Subtarget.is64BitELFABI()", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4733, __extension__ __PRETTY_FUNCTION__))
;
4734
4735 const unsigned PtrByteSize = 8;
4736 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4737
4738 static const MCPhysReg GPR[] = {
4739 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4740 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4741 };
4742 static const MCPhysReg VR[] = {
4743 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4744 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4745 };
4746
4747 const unsigned NumGPRs = array_lengthof(GPR);
4748 const unsigned NumFPRs = 13;
4749 const unsigned NumVRs = array_lengthof(VR);
4750 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4751
4752 unsigned NumBytes = LinkageSize;
4753 unsigned AvailableFPRs = NumFPRs;
4754 unsigned AvailableVRs = NumVRs;
4755
4756 for (const ISD::OutputArg& Param : Outs) {
4757 if (Param.Flags.isNest()) continue;
4758
4759 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4760 LinkageSize, ParamAreaSize, NumBytes,
4761 AvailableFPRs, AvailableVRs))
4762 return true;
4763 }
4764 return false;
4765}
4766
4767static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4768 if (CB.arg_size() != CallerFn->arg_size())
4769 return false;
4770
4771 auto CalleeArgIter = CB.arg_begin();
4772 auto CalleeArgEnd = CB.arg_end();
4773 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4774
4775 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4776 const Value* CalleeArg = *CalleeArgIter;
4777 const Value* CallerArg = &(*CallerArgIter);
4778 if (CalleeArg == CallerArg)
4779 continue;
4780
4781 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4782 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4783 // }
4784 // 1st argument of callee is undef and has the same type as caller.
4785 if (CalleeArg->getType() == CallerArg->getType() &&
4786 isa<UndefValue>(CalleeArg))
4787 continue;
4788
4789 return false;
4790 }
4791
4792 return true;
4793}
4794
4795// Returns true if TCO is possible between the callers and callees
4796// calling conventions.
4797static bool
4798areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4799 CallingConv::ID CalleeCC) {
4800 // Tail calls are possible with fastcc and ccc.
4801 auto isTailCallableCC = [] (CallingConv::ID CC){
4802 return CC == CallingConv::C || CC == CallingConv::Fast;
4803 };
4804 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4805 return false;
4806
4807 // We can safely tail call both fastcc and ccc callees from a c calling
4808 // convention caller. If the caller is fastcc, we may have less stack space
4809 // than a non-fastcc caller with the same signature so disable tail-calls in
4810 // that case.
4811 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4812}
4813
4814bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4815 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4816 const SmallVectorImpl<ISD::OutputArg> &Outs,
4817 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4818 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4819
4820 if (DisableSCO && !TailCallOpt) return false;
4821
4822 // Variadic argument functions are not supported.
4823 if (isVarArg) return false;
4824
4825 auto &Caller = DAG.getMachineFunction().getFunction();
4826 // Check that the calling conventions are compatible for tco.
4827 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4828 return false;
4829
4830 // Caller contains any byval parameter is not supported.
4831 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4832 return false;
4833
4834 // Callee contains any byval parameter is not supported, too.
4835 // Note: This is a quick work around, because in some cases, e.g.
4836 // caller's stack size > callee's stack size, we are still able to apply
4837 // sibling call optimization. For example, gcc is able to do SCO for caller1
4838 // in the following example, but not for caller2.
4839 // struct test {
4840 // long int a;
4841 // char ary[56];
4842 // } gTest;
4843 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4844 // b->a = v.a;
4845 // return 0;
4846 // }
4847 // void caller1(struct test a, struct test c, struct test *b) {
4848 // callee(gTest, b); }
4849 // void caller2(struct test *b) { callee(gTest, b); }
4850 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4851 return false;
4852
4853 // If callee and caller use different calling conventions, we cannot pass
4854 // parameters on stack since offsets for the parameter area may be different.
4855 if (Caller.getCallingConv() != CalleeCC &&
4856 needStackSlotPassParameters(Subtarget, Outs))
4857 return false;
4858
4859 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4860 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4861 // callee potentially have different TOC bases then we cannot tail call since
4862 // we need to restore the TOC pointer after the call.
4863 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4864 // We cannot guarantee this for indirect calls or calls to external functions.
4865 // When PC-Relative addressing is used, the concept of the TOC is no longer
4866 // applicable so this check is not required.
4867 // Check first for indirect calls.
4868 if (!Subtarget.isUsingPCRelativeCalls() &&
4869 !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4870 return false;
4871
4872 // Check if we share the TOC base.
4873 if (!Subtarget.isUsingPCRelativeCalls() &&
4874 !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4875 return false;
4876
4877 // TCO allows altering callee ABI, so we don't have to check further.
4878 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4879 return true;
4880
4881 if (DisableSCO) return false;
4882
4883 // If callee use the same argument list that caller is using, then we can
4884 // apply SCO on this case. If it is not, then we need to check if callee needs
4885 // stack for passing arguments.
4886 // PC Relative tail calls may not have a CallBase.
4887 // If there is no CallBase we cannot verify if we have the same argument
4888 // list so assume that we don't have the same argument list.
4889 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4890 needStackSlotPassParameters(Subtarget, Outs))
4891 return false;
4892 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4893 return false;
4894
4895 return true;
4896}
4897
4898/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4899/// for tail call optimization. Targets which want to do tail call
4900/// optimization should implement this function.
4901bool
4902PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4903 CallingConv::ID CalleeCC,
4904 bool isVarArg,
4905 const SmallVectorImpl<ISD::InputArg> &Ins,
4906 SelectionDAG& DAG) const {
4907 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4908 return false;
4909
4910 // Variable argument functions are not supported.
4911 if (isVarArg)
4912 return false;
4913
4914 MachineFunction &MF = DAG.getMachineFunction();
4915 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4916 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4917 // Functions containing by val parameters are not supported.
4918 for (unsigned i = 0; i != Ins.size(); i++) {
4919 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4920 if (Flags.isByVal()) return false;
4921 }
4922
4923 // Non-PIC/GOT tail calls are supported.
4924 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4925 return true;
4926
4927 // At the moment we can only do local tail calls (in same module, hidden
4928 // or protected) if we are generating PIC.
4929 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4930 return G->getGlobal()->hasHiddenVisibility()
4931 || G->getGlobal()->hasProtectedVisibility();
4932 }
4933
4934 return false;
4935}
4936
4937/// isCallCompatibleAddress - Return the immediate to use if the specified
4938/// 32-bit value is representable in the immediate field of a BxA instruction.
4939static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4940 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4941 if (!C) return nullptr;
4942
4943 int Addr = C->getZExtValue();
4944 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4945 SignExtend32<26>(Addr) != Addr)
4946 return nullptr; // Top 6 bits have to be sext of immediate.
4947
4948 return DAG
4949 .getConstant(
4950 (int)C->getZExtValue() >> 2, SDLoc(Op),
4951 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4952 .getNode();
4953}
4954
4955namespace {
4956
4957struct TailCallArgumentInfo {
4958 SDValue Arg;
4959 SDValue FrameIdxOp;
4960 int FrameIdx = 0;
4961
4962 TailCallArgumentInfo() = default;
4963};
4964
4965} // end anonymous namespace
4966
4967/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4968static void StoreTailCallArgumentsToStackSlot(
4969 SelectionDAG &DAG, SDValue Chain,
4970 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4971 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4972 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4973 SDValue Arg = TailCallArgs[i].Arg;
4974 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4975 int FI = TailCallArgs[i].FrameIdx;
4976 // Store relative to framepointer.
4977 MemOpChains.push_back(DAG.getStore(
4978 Chain, dl, Arg, FIN,
4979 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4980 }
4981}
4982
4983/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4984/// the appropriate stack slot for the tail call optimized function call.
4985static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4986 SDValue OldRetAddr, SDValue OldFP,
4987 int SPDiff, const SDLoc &dl) {
4988 if (SPDiff) {
4989 // Calculate the new stack slot for the return address.
4990 MachineFunction &MF = DAG.getMachineFunction();
4991 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4992 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4993 bool isPPC64 = Subtarget.isPPC64();
4994 int SlotSize = isPPC64 ? 8 : 4;
4995 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4996 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4997 NewRetAddrLoc, true);
4998 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4999 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5000 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5001 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5002 }
5003 return Chain;
5004}
5005
5006/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5007/// the position of the argument.
5008static void
5009CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5010 SDValue Arg, int SPDiff, unsigned ArgOffset,
5011 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5012 int Offset = ArgOffset + SPDiff;
5013 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5014 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5015 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5016 SDValue FIN = DAG.getFrameIndex(FI, VT);
5017 TailCallArgumentInfo Info;
5018 Info.Arg = Arg;
5019 Info.FrameIdxOp = FIN;
5020 Info.FrameIdx = FI;
5021 TailCallArguments.push_back(Info);
5022}
5023
5024/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5025/// stack slot. Returns the chain as result and the loaded frame pointers in
5026/// LROpOut/FPOpout. Used when tail calling.
5027SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5028 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5029 SDValue &FPOpOut, const SDLoc &dl) const {
5030 if (SPDiff) {
5031 // Load the LR and FP stack slot for later adjusting.
5032 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5033 LROpOut = getReturnAddrFrameIndex(DAG);
5034 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5035 Chain = SDValue(LROpOut.getNode(), 1);
5036 }
5037 return Chain;
5038}
5039
5040/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5041/// by "Src" to address "Dst" of size "Size". Alignment information is
5042/// specified by the specific parameter attribute. The copy will be passed as
5043/// a byval function parameter.
5044/// Sometimes what we are copying is the end of a larger object, the part that
5045/// does not fit in registers.
5046static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5047 SDValue Chain, ISD::ArgFlagsTy Flags,
5048 SelectionDAG &DAG, const SDLoc &dl) {
5049 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5050 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5051 Flags.getNonZeroByValAlign(), false, false, false,
5052 MachinePointerInfo(), MachinePointerInfo());
5053}
5054
5055/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5056/// tail calls.
5057static void LowerMemOpCallTo(
5058 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5059 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5060 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5061 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5062 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5063 if (!isTailCall) {
5064 if (isVector) {
5065 SDValue StackPtr;
5066 if (isPPC64)
5067 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5068 else
5069 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5070 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5071 DAG.getConstant(ArgOffset, dl, PtrVT));
5072 }
5073 MemOpChains.push_back(
5074 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5075 // Calculate and remember argument location.
5076 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5077 TailCallArguments);
5078}
5079
5080static void
5081PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5082 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5083 SDValue FPOp,
5084 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5085 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5086 // might overwrite each other in case of tail call optimization.
5087 SmallVector<SDValue, 8> MemOpChains2;
5088 // Do not flag preceding copytoreg stuff together with the following stuff.
5089 InFlag = SDValue();
5090 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5091 MemOpChains2, dl);
5092 if (!MemOpChains2.empty())
5093 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5094
5095 // Store the return address to the appropriate stack slot.
5096 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5097
5098 // Emit callseq_end just before tailcall node.
5099 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5100 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5101 InFlag = Chain.getValue(1);
5102}
5103
5104// Is this global address that of a function that can be called by name? (as
5105// opposed to something that must hold a descriptor for an indirect call).
5106static bool isFunctionGlobalAddress(SDValue Callee) {
5107 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5108 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5109 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5110 return false;
5111
5112 return G->getGlobal()->getValueType()->isFunctionTy();
5113 }
5114
5115 return false;
5116}
5117
5118SDValue PPCTargetLowering::LowerCallResult(
5119 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5120 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5121 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5122 SmallVector<CCValAssign, 16> RVLocs;
5123 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5124 *DAG.getContext());
5125
5126 CCRetInfo.AnalyzeCallResult(
5127 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5128 ? RetCC_PPC_Cold
5129 : RetCC_PPC);
5130
5131 // Copy all of the result registers out of their specified physreg.
5132 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5133 CCValAssign &VA = RVLocs[i];
5134 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5134, __extension__
__PRETTY_FUNCTION__))
;
5135
5136 SDValue Val;
5137
5138 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5139 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5140 InFlag);
5141 Chain = Lo.getValue(1);
5142 InFlag = Lo.getValue(2);
5143 VA = RVLocs[++i]; // skip ahead to next loc
5144 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5145 InFlag);
5146 Chain = Hi.getValue(1);
5147 InFlag = Hi.getValue(2);
5148 if (!Subtarget.isLittleEndian())
5149 std::swap (Lo, Hi);
5150 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5151 } else {
5152 Val = DAG.getCopyFromReg(Chain, dl,
5153 VA.getLocReg(), VA.getLocVT(), InFlag);
5154 Chain = Val.getValue(1);
5155 InFlag = Val.getValue(2);
5156 }
5157
5158 switch (VA.getLocInfo()) {
5159 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5159)
;
5160 case CCValAssign::Full: break;
5161 case CCValAssign::AExt:
5162 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5163 break;
5164 case CCValAssign::ZExt:
5165 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5166 DAG.getValueType(VA.getValVT()));
5167 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5168 break;
5169 case CCValAssign::SExt:
5170 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5171 DAG.getValueType(VA.getValVT()));
5172 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5173 break;
5174 }
5175
5176 InVals.push_back(Val);
5177 }
5178
5179 return Chain;
5180}
5181
5182static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5183 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5184 // PatchPoint calls are not indirect.
5185 if (isPatchPoint)
5186 return false;
5187
5188 if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
5189 return false;
5190
5191 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5192 // becuase the immediate function pointer points to a descriptor instead of
5193 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5194 // pointer immediate points to the global entry point, while the BLA would
5195 // need to jump to the local entry point (see rL211174).
5196 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5197 isBLACompatibleAddress(Callee, DAG))
5198 return false;
5199
5200 return true;
5201}
5202
5203// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5204static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5205 return Subtarget.isAIXABI() ||
5206 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5207}
5208
5209static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5210 const Function &Caller, const SDValue &Callee,
5211 const PPCSubtarget &Subtarget,
5212 const TargetMachine &TM,
5213 bool IsStrictFPCall = false) {
5214 if (CFlags.IsTailCall)
5215 return PPCISD::TC_RETURN;
5216
5217 unsigned RetOpc = 0;
5218 // This is a call through a function pointer.
5219 if (CFlags.IsIndirect) {
5220 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5221 // indirect calls. The save of the caller's TOC pointer to the stack will be
5222 // inserted into the DAG as part of call lowering. The restore of the TOC
5223 // pointer is modeled by using a pseudo instruction for the call opcode that
5224 // represents the 2 instruction sequence of an indirect branch and link,
5225 // immediately followed by a load of the TOC pointer from the the stack save
5226 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5227 // as it is not saved or used.
5228 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5229 : PPCISD::BCTRL;
5230 } else if (Subtarget.isUsingPCRelativeCalls()) {
5231 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")(static_cast <bool> (Subtarget.is64BitELFABI() &&
"PC Relative is only on ELF ABI.") ? void (0) : __assert_fail
("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5231, __extension__
__PRETTY_FUNCTION__))
;
5232 RetOpc = PPCISD::CALL_NOTOC;
5233 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5234 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5235 // immediately following the call instruction if the caller and callee may
5236 // have different TOC bases. At link time if the linker determines the calls
5237 // may not share a TOC base, the call is redirected to a trampoline inserted
5238 // by the linker. The trampoline will (among other things) save the callers
5239 // TOC pointer at an ABI designated offset in the linkage area and the
5240 // linker will rewrite the nop to be a load of the TOC pointer from the
5241 // linkage area into gpr2.
5242 RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5243 : PPCISD::CALL_NOP;
5244 else
5245 RetOpc = PPCISD::CALL;
5246 if (IsStrictFPCall) {
5247 switch (RetOpc) {
5248 default:
5249 llvm_unreachable("Unknown call opcode")::llvm::llvm_unreachable_internal("Unknown call opcode", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5249)
;
5250 case PPCISD::BCTRL_LOAD_TOC:
5251 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5252 break;
5253 case PPCISD::BCTRL:
5254 RetOpc = PPCISD::BCTRL_RM;
5255 break;
5256 case PPCISD::CALL_NOTOC:
5257 RetOpc = PPCISD::CALL_NOTOC_RM;
5258 break;
5259 case PPCISD::CALL:
5260 RetOpc = PPCISD::CALL_RM;
5261 break;
5262 case PPCISD::CALL_NOP:
5263 RetOpc = PPCISD::CALL_NOP_RM;
5264 break;
5265 }
5266 }
5267 return RetOpc;
5268}
5269
5270static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5271 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5272 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5273 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5274 return SDValue(Dest, 0);
5275
5276 // Returns true if the callee is local, and false otherwise.
5277 auto isLocalCallee = [&]() {
5278 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5279 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5280 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5281
5282 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5283 !isa_and_nonnull<GlobalIFunc>(GV);
5284 };
5285
5286 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5287 // a static relocation model causes some versions of GNU LD (2.17.50, at
5288 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5289 // built with secure-PLT.
5290 bool UsePlt =
5291 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5292 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5293
5294 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5295 const TargetMachine &TM = Subtarget.getTargetMachine();
5296 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5297 MCSymbolXCOFF *S =
5298 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5299
5300 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5301 return DAG.getMCSymbol(S, PtrVT);
5302 };
5303
5304 if (isFunctionGlobalAddress(Callee)) {
5305 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5306
5307 if (Subtarget.isAIXABI()) {
5308 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")(static_cast <bool> (!isa<GlobalIFunc>(GV) &&
"IFunc is not supported on AIX.") ? void (0) : __assert_fail
("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5308, __extension__
__PRETTY_FUNCTION__))
;
5309 return getAIXFuncEntryPointSymbolSDNode(GV);
5310 }
5311 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5312 UsePlt ? PPCII::MO_PLT : 0);
5313 }
5314
5315 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5316 const char *SymName = S->getSymbol();
5317 if (Subtarget.isAIXABI()) {
5318 // If there exists a user-declared function whose name is the same as the
5319 // ExternalSymbol's, then we pick up the user-declared version.
5320 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5321 if (const Function *F =
5322 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5323 return getAIXFuncEntryPointSymbolSDNode(F);
5324
5325 // On AIX, direct function calls reference the symbol for the function's
5326 // entry point, which is named by prepending a "." before the function's
5327 // C-linkage name. A Qualname is returned here because an external
5328 // function entry point is a csect with XTY_ER property.
5329 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5330 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5331 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5332 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5333 XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5334 return Sec->getQualNameSymbol();
5335 };
5336
5337 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5338 }
5339 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5340 UsePlt ? PPCII::MO_PLT : 0);
5341 }
5342
5343 // No transformation needed.
5344 assert(Callee.getNode() && "What no callee?")(static_cast <bool> (Callee.getNode() && "What no callee?"
) ? void (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5344, __extension__
__PRETTY_FUNCTION__))
;
5345 return Callee;
5346}
5347
5348static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5349 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&(static_cast <bool> (CallSeqStart.getOpcode() == ISD::CALLSEQ_START
&& "Expected a CALLSEQ_STARTSDNode.") ? void (0) : __assert_fail
("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5350, __extension__
__PRETTY_FUNCTION__))
5350 "Expected a CALLSEQ_STARTSDNode.")(static_cast <bool> (CallSeqStart.getOpcode() == ISD::CALLSEQ_START
&& "Expected a CALLSEQ_STARTSDNode.") ? void (0) : __assert_fail
("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5350, __extension__
__PRETTY_FUNCTION__))
;
5351
5352 // The last operand is the chain, except when the node has glue. If the node
5353 // has glue, then the last operand is the glue, and the chain is the second
5354 // last operand.
5355 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5356 if (LastValue.getValueType() != MVT::Glue)
5357 return LastValue;
5358
5359 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5360}
5361
5362// Creates the node that moves a functions address into the count register
5363// to prepare for an indirect call instruction.
5364static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5365 SDValue &Glue, SDValue &Chain,
5366 const SDLoc &dl) {
5367 SDValue MTCTROps[] = {Chain, Callee, Glue};
5368 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5369 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5370 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5371 // The glue is the second value produced.
5372 Glue = Chain.getValue(1);
5373}
5374
5375static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5376 SDValue &Glue, SDValue &Chain,
5377 SDValue CallSeqStart,
5378 const CallBase *CB, const SDLoc &dl,
5379 bool hasNest,
5380 const PPCSubtarget &Subtarget) {
5381 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5382 // entry point, but to the function descriptor (the function entry point
5383 // address is part of the function descriptor though).
5384 // The function descriptor is a three doubleword structure with the
5385 // following fields: function entry point, TOC base address and
5386 // environment pointer.
5387 // Thus for a call through a function pointer, the following actions need
5388 // to be performed:
5389 // 1. Save the TOC of the caller in the TOC save area of its stack
5390 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5391 // 2. Load the address of the function entry point from the function
5392 // descriptor.
5393 // 3. Load the TOC of the callee from the function descriptor into r2.
5394 // 4. Load the environment pointer from the function descriptor into
5395 // r11.
5396 // 5. Branch to the function entry point address.
5397 // 6. On return of the callee, the TOC of the caller needs to be
5398 // restored (this is done in FinishCall()).
5399 //
5400 // The loads are scheduled at the beginning of the call sequence, and the
5401 // register copies are flagged together to ensure that no other
5402 // operations can be scheduled in between. E.g. without flagging the
5403 // copies together, a TOC access in the caller could be scheduled between
5404 // the assignment of the callee TOC and the branch to the callee, which leads
5405 // to incorrect code.
5406
5407 // Start by loading the function address from the descriptor.
5408 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5409 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5410 ? (MachineMemOperand::MODereferenceable |
5411 MachineMemOperand::MOInvariant)
5412 : MachineMemOperand::MONone;
5413
5414 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5415
5416 // Registers used in building the DAG.
5417 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5418 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5419
5420 // Offsets of descriptor members.
5421 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5422 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5423
5424 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5425 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5426
5427 // One load for the functions entry point address.
5428 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5429 Alignment, MMOFlags);
5430
5431 // One for loading the TOC anchor for the module that contains the called
5432 // function.
5433 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5434 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5435 SDValue TOCPtr =
5436 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5437 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5438
5439 // One for loading the environment pointer.
5440 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5441 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5442 SDValue LoadEnvPtr =
5443 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5444 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5445
5446
5447 // Then copy the newly loaded TOC anchor to the TOC pointer.
5448 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5449 Chain = TOCVal.getValue(0);
5450 Glue = TOCVal.getValue(1);
5451
5452 // If the function call has an explicit 'nest' parameter, it takes the
5453 // place of the environment pointer.
5454 assert((!hasNest || !Subtarget.isAIXABI()) &&(static_cast <bool> ((!hasNest || !Subtarget.isAIXABI()
) && "Nest parameter is not supported on AIX.") ? void
(0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5455, __extension__
__PRETTY_FUNCTION__))
5455 "Nest parameter is not supported on AIX.")(static_cast <bool> ((!hasNest || !Subtarget.isAIXABI()
) && "Nest parameter is not supported on AIX.") ? void
(0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5455, __extension__
__PRETTY_FUNCTION__))
;
5456 if (!hasNest) {
5457 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5458 Chain = EnvVal.getValue(0);
5459 Glue = EnvVal.getValue(1);
5460 }
5461
5462 // The rest of the indirect call sequence is the same as the non-descriptor
5463 // DAG.
5464 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5465}
5466
5467static void
5468buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5469 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5470 SelectionDAG &DAG,
5471 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5472 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5473 const PPCSubtarget &Subtarget) {
5474 const bool IsPPC64 = Subtarget.isPPC64();
5475 // MVT for a general purpose register.
5476 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5477
5478 // First operand is always the chain.
5479 Ops.push_back(Chain);
5480
5481 // If it's a direct call pass the callee as the second operand.
5482 if (!CFlags.IsIndirect)
5483 Ops.push_back(Callee);
5484 else {
5485 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")(static_cast <bool> (!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? void (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5485, __extension__
__PRETTY_FUNCTION__))
;
5486
5487 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5488 // on the stack (this would have been done in `LowerCall_64SVR4` or
5489 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5490 // represents both the indirect branch and a load that restores the TOC
5491 // pointer from the linkage area. The operand for the TOC restore is an add
5492 // of the TOC save offset to the stack pointer. This must be the second
5493 // operand: after the chain input but before any other variadic arguments.
5494 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5495 // saved or used.
5496 if (isTOCSaveRestoreRequired(Subtarget)) {
5497 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5498
5499 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5500 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5501 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5502 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5503 Ops.push_back(AddTOC);
5504 }
5505
5506 // Add the register used for the environment pointer.
5507 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5508 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5509 RegVT));
5510
5511
5512 // Add CTR register as callee so a bctr can be emitted later.
5513 if (CFlags.IsTailCall)
5514 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5515 }
5516
5517 // If this is a tail call add stack pointer delta.
5518 if (CFlags.IsTailCall)
5519 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5520
5521 // Add argument registers to the end of the list so that they are known live
5522 // into the call.
5523 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5524 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5525 RegsToPass[i].second.getValueType()));
5526
5527 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5528 // no way to mark dependencies as implicit here.
5529 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5530 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5531 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5532 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5533
5534 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5535 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5536 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5537
5538 // Add a register mask operand representing the call-preserved registers.
5539 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5540 const uint32_t *Mask =
5541 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5542 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5542, __extension__
__PRETTY_FUNCTION__))
;
5543 Ops.push_back(DAG.getRegisterMask(Mask));
5544
5545 // If the glue is valid, it is the last operand.
5546 if (Glue.getNode())
5547 Ops.push_back(Glue);
5548}
5549
5550SDValue PPCTargetLowering::FinishCall(
5551 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5552 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5553 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5554 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5555 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5556
5557 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5558 Subtarget.isAIXABI())
5559 setUsesTOCBasePtr(DAG);
5560
5561 unsigned CallOpc =
5562 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5563 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5564
5565 if (!CFlags.IsIndirect)
5566 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5567 else if (Subtarget.usesFunctionDescriptors())
5568 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5569 dl, CFlags.HasNest, Subtarget);
5570 else
5571 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5572
5573 // Build the operand list for the call instruction.
5574 SmallVector<SDValue, 8> Ops;
5575 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5576 SPDiff, Subtarget);
5577
5578 // Emit tail call.
5579 if (CFlags.IsTailCall) {
5580 // Indirect tail call when using PC Relative calls do not have the same
5581 // constraints.
5582 assert(((Callee.getOpcode() == ISD::Register &&(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5583 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5584 Callee.getOpcode() == ISD::TargetExternalSymbol ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5585 Callee.getOpcode() == ISD::TargetGlobalAddress ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5586 isa<ConstantSDNode>(Callee) ||(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5587 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5588 "Expecting a global address, external symbol, absolute value, "(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5589 "register or an indirect tail call when PC Relative calls are "(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
5590 "used.")(static_cast <bool> (((Callee.getOpcode() == ISD::Register
&& cast<RegisterSDNode>(Callee)->getReg() ==
PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol
|| Callee.getOpcode() == ISD::TargetGlobalAddress || isa<
ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget
.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? void (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5590, __extension__
__PRETTY_FUNCTION__))
;
5591 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5592 assert(CallOpc == PPCISD::TC_RETURN &&(static_cast <bool> (CallOpc == PPCISD::TC_RETURN &&
"Unexpected call opcode for a tail call.") ? void (0) : __assert_fail
("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5593, __extension__
__PRETTY_FUNCTION__))
5593 "Unexpected call opcode for a tail call.")(static_cast <bool> (CallOpc == PPCISD::TC_RETURN &&
"Unexpected call opcode for a tail call.") ? void (0) : __assert_fail
("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5593, __extension__
__PRETTY_FUNCTION__))
;
5594 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5595 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5596 }
5597
5598 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5599 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5600 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5601 Glue = Chain.getValue(1);
5602
5603 // When performing tail call optimization the callee pops its arguments off
5604 // the stack. Account for this here so these bytes can be pushed back on in
5605 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5606 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5607 getTargetMachine().Options.GuaranteedTailCallOpt)
5608 ? NumBytes
5609 : 0;
5610
5611 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5612 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5613 Glue, dl);
5614 Glue = Chain.getValue(1);
5615
5616 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5617 DAG, InVals);
5618}
5619
5620SDValue
5621PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5622 SmallVectorImpl<SDValue> &InVals) const {
5623 SelectionDAG &DAG = CLI.DAG;
5624 SDLoc &dl = CLI.DL;
5625 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5626 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5627 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5628 SDValue Chain = CLI.Chain;
5629 SDValue Callee = CLI.Callee;
5630 bool &isTailCall = CLI.IsTailCall;
5631 CallingConv::ID CallConv = CLI.CallConv;
5632 bool isVarArg = CLI.IsVarArg;
5633 bool isPatchPoint = CLI.IsPatchPoint;
5634 const CallBase *CB = CLI.CB;
5635
5636 if (isTailCall) {
5637 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5638 isTailCall = false;
5639 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5640 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5641 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5642 else
5643 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5644 Ins, DAG);
5645 if (isTailCall) {
5646 ++NumTailCalls;
5647 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5648 ++NumSiblingCalls;
5649
5650 // PC Relative calls no longer guarantee that the callee is a Global
5651 // Address Node. The callee could be an indirect tail call in which
5652 // case the SDValue for the callee could be a load (to load the address
5653 // of a function pointer) or it may be a register copy (to move the
5654 // address of the callee from a function parameter into a virtual
5655 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5656 assert((Subtarget.isUsingPCRelativeCalls() ||(static_cast <bool> ((Subtarget.isUsingPCRelativeCalls(
) || isa<GlobalAddressSDNode>(Callee)) && "Callee should be an llvm::Function object."
) ? void (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5658, __extension__
__PRETTY_FUNCTION__))
5657 isa<GlobalAddressSDNode>(Callee)) &&(static_cast <bool> ((Subtarget.isUsingPCRelativeCalls(
) || isa<GlobalAddressSDNode>(Callee)) && "Callee should be an llvm::Function object."
) ? void (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5658, __extension__
__PRETTY_FUNCTION__))
5658 "Callee should be an llvm::Function object.")(static_cast <bool> ((Subtarget.isUsingPCRelativeCalls(
) || isa<GlobalAddressSDNode>(Callee)) && "Callee should be an llvm::Function object."
) ? void (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5658, __extension__
__PRETTY_FUNCTION__))
;
5659
5660 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
5661 << "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
;
5662 LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false)
;
5663 }
5664 }
5665
5666 if (!isTailCall && CB && CB->isMustTailCall())
5667 report_fatal_error("failed to perform tail call elimination on a call "
5668 "site marked musttail");
5669
5670 // When long calls (i.e. indirect calls) are always used, calls are always
5671 // made via function pointer. If we have a function name, first translate it
5672 // into a pointer.
5673 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5674 !isTailCall)
5675 Callee = LowerGlobalAddress(Callee, DAG);
5676
5677 CallFlags CFlags(
5678 CallConv, isTailCall, isVarArg, isPatchPoint,
5679 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5680 // hasNest
5681 Subtarget.is64BitELFABI() &&
5682 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5683 CLI.NoMerge);
5684
5685 if (Subtarget.isAIXABI())
5686 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5687 InVals, CB);
5688
5689 assert(Subtarget.isSVR4ABI())(static_cast <bool> (Subtarget.isSVR4ABI()) ? void (0) :
__assert_fail ("Subtarget.isSVR4ABI()", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5689, __extension__ __PRETTY_FUNCTION__))
;
5690 if (Subtarget.isPPC64())
5691 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5692 InVals, CB);
5693 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5694 InVals, CB);
5695}
5696
5697SDValue PPCTargetLowering::LowerCall_32SVR4(
5698 SDValue Chain, SDValue Callee, CallFlags CFlags,
5699 const SmallVectorImpl<ISD::OutputArg> &Outs,
5700 const SmallVectorImpl<SDValue> &OutVals,
5701 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5702 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5703 const CallBase *CB) const {
5704 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5705 // of the 32-bit SVR4 ABI stack frame layout.
5706
5707 const CallingConv::ID CallConv = CFlags.CallConv;
5708 const bool IsVarArg = CFlags.IsVarArg;
5709 const bool IsTailCall = CFlags.IsTailCall;
5710
5711 assert((CallConv == CallingConv::C ||(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unknown calling convention!") ? void (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5713, __extension__
__PRETTY_FUNCTION__))
5712 CallConv == CallingConv::Cold ||(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unknown calling convention!") ? void (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5713, __extension__
__PRETTY_FUNCTION__))
5713 CallConv == CallingConv::Fast) && "Unknown calling convention!")(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unknown calling convention!") ? void (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5713, __extension__
__PRETTY_FUNCTION__))
;
5714
5715 const Align PtrAlign(4);
5716
5717 MachineFunction &MF = DAG.getMachineFunction();
5718
5719 // Mark this function as potentially containing a function that contains a
5720 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5721 // and restoring the callers stack pointer in this functions epilog. This is
5722 // done because by tail calling the called function might overwrite the value
5723 // in this function's (MF) stack pointer stack slot 0(SP).
5724 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5725 CallConv == CallingConv::Fast)
5726 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5727
5728 // Count how many bytes are to be pushed on the stack, including the linkage
5729 // area, parameter list area and the part of the local variable space which
5730 // contains copies of aggregates which are passed by value.
5731
5732 // Assign locations to all of the outgoing arguments.
5733 SmallVector<CCValAssign, 16> ArgLocs;
5734 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5735
5736 // Reserve space for the linkage area on the stack.
5737 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5738 PtrAlign);
5739 if (useSoftFloat())
5740 CCInfo.PreAnalyzeCallOperands(Outs);
5741
5742 if (IsVarArg) {
5743 // Handle fixed and variable vector arguments differently.
5744 // Fixed vector arguments go into registers as long as registers are
5745 // available. Variable vector arguments always go into memory.
5746 unsigned NumArgs = Outs.size();
5747
5748 for (unsigned i = 0; i != NumArgs; ++i) {
5749 MVT ArgVT = Outs[i].VT;
5750 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5751 bool Result;
5752
5753 if (Outs[i].IsFixed) {
5754 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5755 CCInfo);
5756 } else {
5757 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5758 ArgFlags, CCInfo);
5759 }
5760
5761 if (Result) {
5762#ifndef NDEBUG
5763 errs() << "Call operand #" << i << " has unhandled type "
5764 << EVT(ArgVT).getEVTString() << "\n";
5765#endif
5766 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5766)
;
5767 }
5768 }
5769 } else {
5770 // All arguments are treated the same.
5771 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5772 }
5773 CCInfo.clearWasPPCF128();
5774
5775 // Assign locations to all of the outgoing aggregate by value arguments.
5776 SmallVector<CCValAssign, 16> ByValArgLocs;
5777 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5778
5779 // Reserve stack space for the allocations in CCInfo.
5780 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5781
5782 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5783
5784 // Size of the linkage area, parameter list area and the part of the local
5785 // space variable where copies of aggregates which are passed by value are
5786 // stored.
5787 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5788
5789 // Calculate by how many bytes the stack has to be adjusted in case of tail
5790 // call optimization.
5791 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5792
5793 // Adjust the stack pointer for the new arguments...
5794 // These operations are automatically eliminated by the prolog/epilog pass
5795 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5796 SDValue CallSeqStart = Chain;
5797
5798 // Load the return address and frame pointer so it can be moved somewhere else
5799 // later.
5800 SDValue LROp, FPOp;
5801 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5802
5803 // Set up a copy of the stack pointer for use loading and storing any
5804 // arguments that may not fit in the registers available for argument
5805 // passing.
5806 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5807
5808 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5809 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5810 SmallVector<SDValue, 8> MemOpChains;
5811
5812 bool seenFloatArg = false;
5813 // Walk the register/memloc assignments, inserting copies/loads.
5814 // i - Tracks the index into the list of registers allocated for the call
5815 // RealArgIdx - Tracks the index into the list of actual function arguments
5816 // j - Tracks the index into the list of byval arguments
5817 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5818 i != e;
5819 ++i, ++RealArgIdx) {
5820 CCValAssign &VA = ArgLocs[i];
5821 SDValue Arg = OutVals[RealArgIdx];
5822 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5823
5824 if (Flags.isByVal()) {
5825 // Argument is an aggregate which is passed by value, thus we need to
5826 // create a copy of it in the local variable space of the current stack
5827 // frame (which is the stack frame of the caller) and pass the address of
5828 // this copy to the callee.
5829 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(static_cast <bool> ((j < ByValArgLocs.size()) &&
"Index out of bounds!") ? void (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5829, __extension__
__PRETTY_FUNCTION__))
;
5830 CCValAssign &ByValVA = ByValArgLocs[j++];
5831 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(static_cast <bool> ((VA.getValNo() == ByValVA.getValNo
()) && "ValNo mismatch!") ? void (0) : __assert_fail (
"(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5831, __extension__
__PRETTY_FUNCTION__))
;
5832
5833 // Memory reserved in the local variable space of the callers stack frame.
5834 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5835
5836 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5837 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5838 StackPtr, PtrOff);
5839
5840 // Create a copy of the argument in the local area of the current
5841 // stack frame.
5842 SDValue MemcpyCall =
5843 CreateCopyOfByValArgument(Arg, PtrOff,
5844 CallSeqStart.getNode()->getOperand(0),
5845 Flags, DAG, dl);
5846
5847 // This must go outside the CALLSEQ_START..END.
5848 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5849 SDLoc(MemcpyCall));
5850 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5851 NewCallSeqStart.getNode());
5852 Chain = CallSeqStart = NewCallSeqStart;
5853
5854 // Pass the address of the aggregate copy on the stack either in a
5855 // physical register or in the parameter list area of the current stack
5856 // frame to the callee.
5857 Arg = PtrOff;
5858 }
5859
5860 // When useCRBits() is true, there can be i1 arguments.
5861 // It is because getRegisterType(MVT::i1) => MVT::i1,
5862 // and for other integer types getRegisterType() => MVT::i32.
5863 // Extend i1 and ensure callee will get i32.
5864 if (Arg.getValueType() == MVT::i1)
5865 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5866 dl, MVT::i32, Arg);
5867
5868 if (VA.isRegLoc()) {
5869 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5870 // Put argument in a physical register.
5871 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5872 bool IsLE = Subtarget.isLittleEndian();
5873 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5874 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5875 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5876 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5877 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5878 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5879 SVal.getValue(0)));
5880 } else
5881 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5882 } else {
5883 // Put argument in the parameter list area of the current stack frame.
5884 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5884, __extension__ __PRETTY_FUNCTION__))
;
5885 unsigned LocMemOffset = VA.getLocMemOffset();
5886
5887 if (!IsTailCall) {
5888 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5889 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5890 StackPtr, PtrOff);
5891
5892 MemOpChains.push_back(
5893 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5894 } else {
5895 // Calculate and remember argument location.
5896 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5897 TailCallArguments);
5898 }
5899 }
5900 }
5901
5902 if (!MemOpChains.empty())
5903 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5904
5905 // Build a sequence of copy-to-reg nodes chained together with token chain
5906 // and flag operands which copy the outgoing args into the appropriate regs.
5907 SDValue InFlag;
5908 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5909 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5910 RegsToPass[i].second, InFlag);
5911 InFlag = Chain.getValue(1);
5912 }
5913
5914 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5915 // registers.
5916 if (IsVarArg) {
5917 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5918 SDValue Ops[] = { Chain, InFlag };
5919
5920 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5921 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5922
5923 InFlag = Chain.getValue(1);
5924 }
5925
5926 if (IsTailCall)
5927 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5928 TailCallArguments);
5929
5930 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5931 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5932}
5933
5934// Copy an argument into memory, being careful to do this outside the
5935// call sequence for the call to which the argument belongs.
5936SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5937 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5938 SelectionDAG &DAG, const SDLoc &dl) const {
5939 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5940 CallSeqStart.getNode()->getOperand(0),
5941 Flags, DAG, dl);
5942 // The MEMCPY must go outside the CALLSEQ_START..END.
5943 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5944 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5945 SDLoc(MemcpyCall));
5946 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5947 NewCallSeqStart.getNode());
5948 return NewCallSeqStart;
5949}
5950
5951SDValue PPCTargetLowering::LowerCall_64SVR4(
5952 SDValue Chain, SDValue Callee, CallFlags CFlags,
5953 const SmallVectorImpl<ISD::OutputArg> &Outs,
5954 const SmallVectorImpl<SDValue> &OutVals,
5955 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5956 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5957 const CallBase *CB) const {
5958 bool isELFv2ABI = Subtarget.isELFv2ABI();
5959 bool isLittleEndian = Subtarget.isLittleEndian();
5960 unsigned NumOps = Outs.size();
5961 bool IsSibCall = false;
5962 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5963
5964 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5965 unsigned PtrByteSize = 8;
5966
5967 MachineFunction &MF = DAG.getMachineFunction();
5968
5969 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5970 IsSibCall = true;
5971
5972 // Mark this function as potentially containing a function that contains a
5973 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5974 // and restoring the callers stack pointer in this functions epilog. This is
5975 // done because by tail calling the called function might overwrite the value
5976 // in this function's (MF) stack pointer stack slot 0(SP).
5977 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5978 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5979
5980 assert(!(IsFastCall && CFlags.IsVarArg) &&(static_cast <bool> (!(IsFastCall && CFlags.IsVarArg
) && "fastcc not supported on varargs functions") ? void
(0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5981, __extension__
__PRETTY_FUNCTION__))
5981 "fastcc not supported on varargs functions")(static_cast <bool> (!(IsFastCall && CFlags.IsVarArg
) && "fastcc not supported on varargs functions") ? void
(0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 5981, __extension__
__PRETTY_FUNCTION__))
;
5982
5983 // Count how many bytes are to be pushed on the stack, including the linkage
5984 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5985 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5986 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5987 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5988 unsigned NumBytes = LinkageSize;
5989 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5990
5991 static const MCPhysReg GPR[] = {
5992 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5993 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5994 };
5995 static const MCPhysReg VR[] = {
5996 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5997 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5998 };
5999
6000 const unsigned NumGPRs = array_lengthof(GPR);
6001 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6002 const unsigned NumVRs = array_lengthof(VR);
6003
6004 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6005 // can be passed to the callee in registers.
6006 // For the fast calling convention, there is another check below.
6007 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6008 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6009 if (!HasParameterArea) {
6010 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6011 unsigned AvailableFPRs = NumFPRs;
6012 unsigned AvailableVRs = NumVRs;
6013 unsigned NumBytesTmp = NumBytes;
6014 for (unsigned i = 0; i != NumOps; ++i) {
6015 if (Outs[i].Flags.isNest()) continue;
6016 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6017 PtrByteSize, LinkageSize, ParamAreaSize,
6018 NumBytesTmp, AvailableFPRs, AvailableVRs))
6019 HasParameterArea = true;
6020 }
6021 }
6022
6023 // When using the fast calling convention, we don't provide backing for
6024 // arguments that will be in registers.
6025 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6026
6027 // Avoid allocating parameter area for fastcc functions if all the arguments
6028 // can be passed in the registers.
6029 if (IsFastCall)
6030 HasParameterArea = false;
6031
6032 // Add up all the space actually used.
6033 for (unsigned i = 0; i != NumOps; ++i) {
6034 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6035 EVT ArgVT = Outs[i].VT;
6036 EVT OrigVT = Outs[i].ArgVT;
6037
6038 if (Flags.isNest())
6039 continue;
6040
6041 if (IsFastCall) {
6042 if (Flags.isByVal()) {
6043 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6044 if (NumGPRsUsed > NumGPRs)
6045 HasParameterArea = true;
6046 } else {
6047 switch (ArgVT.getSimpleVT().SimpleTy) {
6048 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6048)
;
6049 case MVT::i1:
6050 case MVT::i32:
6051 case MVT::i64:
6052 if (++NumGPRsUsed <= NumGPRs)
6053 continue;
6054 break;
6055 case MVT::v4i32:
6056 case MVT::v8i16:
6057 case MVT::v16i8:
6058 case MVT::v2f64:
6059 case MVT::v2i64:
6060 case MVT::v1i128:
6061 case MVT::f128:
6062 if (++NumVRsUsed <= NumVRs)
6063 continue;
6064 break;
6065 case MVT::v4f32:
6066 if (++NumVRsUsed <= NumVRs)
6067 continue;
6068 break;
6069 case MVT::f32:
6070 case MVT::f64:
6071 if (++NumFPRsUsed <= NumFPRs)
6072 continue;
6073 break;
6074 }
6075 HasParameterArea = true;
6076 }
6077 }
6078
6079 /* Respect alignment of argument on the stack. */
6080 auto Alignement =
6081 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6082 NumBytes = alignTo(NumBytes, Alignement);
6083
6084 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6085 if (Flags.isInConsecutiveRegsLast())
6086 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6087 }
6088
6089 unsigned NumBytesActuallyUsed = NumBytes;
6090
6091 // In the old ELFv1 ABI,
6092 // the prolog code of the callee may store up to 8 GPR argument registers to
6093 // the stack, allowing va_start to index over them in memory if its varargs.
6094 // Because we cannot tell if this is needed on the caller side, we have to
6095 // conservatively assume that it is needed. As such, make sure we have at
6096 // least enough stack space for the caller to store the 8 GPRs.
6097 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6098 // really requires memory operands, e.g. a vararg function.
6099 if (HasParameterArea)
6100 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6101 else
6102 NumBytes = LinkageSize;
6103
6104 // Tail call needs the stack to be aligned.
6105 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6106 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6107
6108 int SPDiff = 0;
6109
6110 // Calculate by how many bytes the stack has to be adjusted in case of tail
6111 // call optimization.
6112 if (!IsSibCall)
6113 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6114
6115 // To protect arguments on the stack from being clobbered in a tail call,
6116 // force all the loads to happen before doing any other lowering.
6117 if (CFlags.IsTailCall)
6118 Chain = DAG.getStackArgumentTokenFactor(Chain);
6119
6120 // Adjust the stack pointer for the new arguments...
6121 // These operations are automatically eliminated by the prolog/epilog pass
6122 if (!IsSibCall)
6123 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6124 SDValue CallSeqStart = Chain;
6125
6126 // Load the return address and frame pointer so it can be move somewhere else
6127 // later.
6128 SDValue LROp, FPOp;
6129 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6130
6131 // Set up a copy of the stack pointer for use loading and storing any
6132 // arguments that may not fit in the registers available for argument
6133 // passing.
6134 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6135
6136 // Figure out which arguments are going to go in registers, and which in
6137 // memory. Also, if this is a vararg function, floating point operations
6138 // must be stored to our stack, and loaded into integer regs as well, if
6139 // any integer regs are available for argument passing.
6140 unsigned ArgOffset = LinkageSize;
6141
6142 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6143 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6144
6145 SmallVector<SDValue, 8> MemOpChains;
6146 for (unsigned i = 0; i != NumOps; ++i) {
6147 SDValue Arg = OutVals[i];
6148 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6149 EVT ArgVT = Outs[i].VT;
6150 EVT OrigVT = Outs[i].ArgVT;
6151
6152 // PtrOff will be used to store the current argument to the stack if a
6153 // register cannot be found for it.
6154 SDValue PtrOff;
6155
6156 // We re-align the argument offset for each argument, except when using the
6157 // fast calling convention, when we need to make sure we do that only when
6158 // we'll actually use a stack slot.
6159 auto ComputePtrOff = [&]() {
6160 /* Respect alignment of argument on the stack. */
6161 auto Alignment =
6162 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6163 ArgOffset = alignTo(ArgOffset, Alignment);
6164
6165 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6166
6167 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6168 };
6169
6170 if (!IsFastCall) {
6171 ComputePtrOff();
6172
6173 /* Compute GPR index associated with argument offset. */
6174 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6175 GPR_idx = std::min(GPR_idx, NumGPRs);
6176 }
6177
6178 // Promote integers to 64-bit values.
6179 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6180 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6181 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6182 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6183 }
6184
6185 // FIXME memcpy is used way more than necessary. Correctness first.
6186 // Note: "by value" is code for passing a structure by value, not
6187 // basic types.
6188 if (Flags.isByVal()) {
6189 // Note: Size includes alignment padding, so
6190 // struct x { short a; char b; }
6191 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6192 // These are the proper values we need for right-justifying the
6193 // aggregate in a parameter register.
6194 unsigned Size = Flags.getByValSize();
6195
6196 // An empty aggregate parameter takes up no storage and no
6197 // registers.
6198 if (Size == 0)
6199 continue;
6200
6201 if (IsFastCall)
6202 ComputePtrOff();
6203
6204 // All aggregates smaller than 8 bytes must be passed right-justified.
6205 if (Size==1 || Size==2 || Size==4) {
6206 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6207 if (GPR_idx != NumGPRs) {
6208 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6209 MachinePointerInfo(), VT);
6210 MemOpChains.push_back(Load.getValue(1));
6211 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6212
6213 ArgOffset += PtrByteSize;
6214 continue;
6215 }
6216 }
6217
6218 if (GPR_idx == NumGPRs && Size < 8) {
6219 SDValue AddPtr = PtrOff;
6220 if (!isLittleEndian) {
6221 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6222 PtrOff.getValueType());
6223 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6224 }
6225 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6226 CallSeqStart,
6227 Flags, DAG, dl);
6228 ArgOffset += PtrByteSize;
6229 continue;
6230 }
6231 // Copy the object to parameter save area if it can not be entirely passed
6232 // by registers.
6233 // FIXME: we only need to copy the parts which need to be passed in
6234 // parameter save area. For the parts passed by registers, we don't need
6235 // to copy them to the stack although we need to allocate space for them
6236 // in parameter save area.
6237 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6238 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6239 CallSeqStart,
6240 Flags, DAG, dl);
6241
6242 // When a register is available, pass a small aggregate right-justified.
6243 if (Size < 8 && GPR_idx != NumGPRs) {
6244 // The easiest way to get this right-justified in a register
6245 // is to copy the structure into the rightmost portion of a
6246 // local variable slot, then load the whole slot into the
6247 // register.
6248 // FIXME: The memcpy seems to produce pretty awful code for
6249 // small aggregates, particularly for packed ones.
6250 // FIXME: It would be preferable to use the slot in the
6251 // parameter save area instead of a new local variable.
6252 SDValue AddPtr = PtrOff;
6253 if (!isLittleEndian) {
6254 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6255 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6256 }
6257 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6258 CallSeqStart,
6259 Flags, DAG, dl);
6260
6261 // Load the slot into the register.
6262 SDValue Load =
6263 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6264 MemOpChains.push_back(Load.getValue(1));
6265 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6266
6267 // Done with this argument.
6268 ArgOffset += PtrByteSize;
6269 continue;
6270 }
6271
6272 // For aggregates larger than PtrByteSize, copy the pieces of the
6273 // object that fit into registers from the parameter save area.
6274 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6275 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6276 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6277 if (GPR_idx != NumGPRs) {
6278 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6279 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6280 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6281 MachinePointerInfo(), ObjType);
6282
6283 MemOpChains.push_back(Load.getValue(1));
6284 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6285 ArgOffset += PtrByteSize;
6286 } else {
6287 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6288 break;
6289 }
6290 }
6291 continue;
6292 }
6293
6294 switch (Arg.getSimpleValueType().SimpleTy) {
6295 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6295)
;
6296 case MVT::i1:
6297 case MVT::i32:
6298 case MVT::i64:
6299 if (Flags.isNest()) {
6300 // The 'nest' parameter, if any, is passed in R11.
6301 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6302 break;
6303 }
6304
6305 // These can be scalar arguments or elements of an integer array type
6306 // passed directly. Clang may use those instead of "byval" aggregate
6307 // types to avoid forcing arguments to memory unnecessarily.
6308 if (GPR_idx != NumGPRs) {
6309 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6310 } else {
6311 if (IsFastCall)
6312 ComputePtrOff();
6313
6314 assert(HasParameterArea &&(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6315, __extension__
__PRETTY_FUNCTION__))
6315 "Parameter area must exist to pass an argument in memory.")(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6315, __extension__
__PRETTY_FUNCTION__))
;
6316 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6317 true, CFlags.IsTailCall, false, MemOpChains,
6318 TailCallArguments, dl);
6319 if (IsFastCall)
6320 ArgOffset += PtrByteSize;
6321 }
6322 if (!IsFastCall)
6323 ArgOffset += PtrByteSize;
6324 break;
6325 case MVT::f32:
6326 case MVT::f64: {
6327 // These can be scalar arguments or elements of a float array type
6328 // passed directly. The latter are used to implement ELFv2 homogenous
6329 // float aggregates.
6330
6331 // Named arguments go into FPRs first, and once they overflow, the
6332 // remaining arguments go into GPRs and then the parameter save area.
6333 // Unnamed arguments for vararg functions always go to GPRs and
6334 // then the parameter save area. For now, put all arguments to vararg
6335 // routines always in both locations (FPR *and* GPR or stack slot).
6336 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6337 bool NeededLoad = false;
6338
6339 // First load the argument into the next available FPR.
6340 if (FPR_idx != NumFPRs)
6341 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6342
6343 // Next, load the argument into GPR or stack slot if needed.
6344 if (!NeedGPROrStack)
6345 ;
6346 else if (GPR_idx != NumGPRs && !IsFastCall) {
6347 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6348 // once we support fp <-> gpr moves.
6349
6350 // In the non-vararg case, this can only ever happen in the
6351 // presence of f32 array types, since otherwise we never run
6352 // out of FPRs before running out of GPRs.
6353 SDValue ArgVal;
6354
6355 // Double values are always passed in a single GPR.
6356 if (Arg.getValueType() != MVT::f32) {
6357 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6358
6359 // Non-array float values are extended and passed in a GPR.
6360 } else if (!Flags.isInConsecutiveRegs()) {
6361 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6362 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6363
6364 // If we have an array of floats, we collect every odd element
6365 // together with its predecessor into one GPR.
6366 } else if (ArgOffset % PtrByteSize != 0) {
6367 SDValue Lo, Hi;
6368 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6369 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6370 if (!isLittleEndian)
6371 std::swap(Lo, Hi);
6372 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6373
6374 // The final element, if even, goes into the first half of a GPR.
6375 } else if (Flags.isInConsecutiveRegsLast()) {
6376 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6377 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6378 if (!isLittleEndian)
6379 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6380 DAG.getConstant(32, dl, MVT::i32));
6381
6382 // Non-final even elements are skipped; they will be handled
6383 // together the with subsequent argument on the next go-around.
6384 } else
6385 ArgVal = SDValue();
6386
6387 if (ArgVal.getNode())
6388 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6389 } else {
6390 if (IsFastCall)
6391 ComputePtrOff();
6392
6393 // Single-precision floating-point values are mapped to the
6394 // second (rightmost) word of the stack doubleword.
6395 if (Arg.getValueType() == MVT::f32 &&
6396 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6397 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6398 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6399 }
6400
6401 assert(HasParameterArea &&(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6402, __extension__
__PRETTY_FUNCTION__))
6402 "Parameter area must exist to pass an argument in memory.")(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6402, __extension__
__PRETTY_FUNCTION__))
;
6403 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6404 true, CFlags.IsTailCall, false, MemOpChains,
6405 TailCallArguments, dl);
6406
6407 NeededLoad = true;
6408 }
6409 // When passing an array of floats, the array occupies consecutive
6410 // space in the argument area; only round up to the next doubleword
6411 // at the end of the array. Otherwise, each float takes 8 bytes.
6412 if (!IsFastCall || NeededLoad) {
6413 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6414 Flags.isInConsecutiveRegs()) ? 4 : 8;
6415 if (Flags.isInConsecutiveRegsLast())
6416 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6417 }
6418 break;
6419 }
6420 case MVT::v4f32:
6421 case MVT::v4i32:
6422 case MVT::v8i16:
6423 case MVT::v16i8:
6424 case MVT::v2f64:
6425 case MVT::v2i64:
6426 case MVT::v1i128:
6427 case MVT::f128:
6428 // These can be scalar arguments or elements of a vector array type
6429 // passed directly. The latter are used to implement ELFv2 homogenous
6430 // vector aggregates.
6431
6432 // For a varargs call, named arguments go into VRs or on the stack as
6433 // usual; unnamed arguments always go to the stack or the corresponding
6434 // GPRs when within range. For now, we always put the value in both
6435 // locations (or even all three).
6436 if (CFlags.IsVarArg) {
6437 assert(HasParameterArea &&(static_cast <bool> (HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6438, __extension__
__PRETTY_FUNCTION__))
6438 "Parameter area must exist if we have a varargs call.")(static_cast <bool> (HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6438, __extension__
__PRETTY_FUNCTION__))
;
6439 // We could elide this store in the case where the object fits
6440 // entirely in R registers. Maybe later.
6441 SDValue Store =
6442 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6443 MemOpChains.push_back(Store);
6444 if (VR_idx != NumVRs) {
6445 SDValue Load =
6446 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6447 MemOpChains.push_back(Load.getValue(1));
6448 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6449 }
6450 ArgOffset += 16;
6451 for (unsigned i=0; i<16; i+=PtrByteSize) {
6452 if (GPR_idx == NumGPRs)
6453 break;
6454 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6455 DAG.getConstant(i, dl, PtrVT));
6456 SDValue Load =
6457 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6458 MemOpChains.push_back(Load.getValue(1));
6459 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6460 }
6461 break;
6462 }
6463
6464 // Non-varargs Altivec params go into VRs or on the stack.
6465 if (VR_idx != NumVRs) {
6466 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6467 } else {
6468 if (IsFastCall)
6469 ComputePtrOff();
6470
6471 assert(HasParameterArea &&(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6472, __extension__
__PRETTY_FUNCTION__))
6472 "Parameter area must exist to pass an argument in memory.")(static_cast <bool> (HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? void (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6472, __extension__
__PRETTY_FUNCTION__))
;
6473 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6474 true, CFlags.IsTailCall, true, MemOpChains,
6475 TailCallArguments, dl);
6476 if (IsFastCall)
6477 ArgOffset += 16;
6478 }
6479
6480 if (!IsFastCall)
6481 ArgOffset += 16;
6482 break;
6483 }
6484 }
6485
6486 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(static_cast <bool> ((!HasParameterArea || NumBytesActuallyUsed
== ArgOffset) && "mismatch in size of parameter area"
) ? void (0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6487, __extension__
__PRETTY_FUNCTION__))
6487 "mismatch in size of parameter area")(static_cast <bool> ((!HasParameterArea || NumBytesActuallyUsed
== ArgOffset) && "mismatch in size of parameter area"
) ? void (0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6487, __extension__
__PRETTY_FUNCTION__))
;
6488 (void)NumBytesActuallyUsed;
6489
6490 if (!MemOpChains.empty())
6491 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6492
6493 // Check if this is an indirect call (MTCTR/BCTRL).
6494 // See prepareDescriptorIndirectCall and buildCallOperands for more
6495 // information about calls through function pointers in the 64-bit SVR4 ABI.
6496 if (CFlags.IsIndirect) {
6497 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6498 // caller in the TOC save area.
6499 if (isTOCSaveRestoreRequired(Subtarget)) {
6500 assert(!CFlags.IsTailCall && "Indirect tails calls not supported")(static_cast <bool> (!CFlags.IsTailCall && "Indirect tails calls not supported"
) ? void (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tails calls not supported\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6500, __extension__
__PRETTY_FUNCTION__))
;
6501 // Load r2 into a virtual register and store it to the TOC save area.
6502 setUsesTOCBasePtr(DAG);
6503 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6504 // TOC save area offset.
6505 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6506 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6507 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6508 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6509 MachinePointerInfo::getStack(
6510 DAG.getMachineFunction(), TOCSaveOffset));
6511 }
6512 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6513 // This does not mean the MTCTR instruction must use R12; it's easier
6514 // to model this as an extra parameter, so do that.
6515 if (isELFv2ABI && !CFlags.IsPatchPoint)
6516 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6517 }
6518
6519 // Build a sequence of copy-to-reg nodes chained together with token chain
6520 // and flag operands which copy the outgoing args into the appropriate regs.
6521 SDValue InFlag;
6522 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6523 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6524 RegsToPass[i].second, InFlag);
6525 InFlag = Chain.getValue(1);
6526 }
6527
6528 if (CFlags.IsTailCall && !IsSibCall)
6529 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6530 TailCallArguments);
6531
6532 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6533 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6534}
6535
6536// Returns true when the shadow of a general purpose argument register
6537// in the parameter save area is aligned to at least 'RequiredAlign'.
6538static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6539 assert(RequiredAlign.value() <= 16 &&(static_cast <bool> (RequiredAlign.value() <= 16 &&
"Required alignment greater than stack alignment.") ? void (
0) : __assert_fail ("RequiredAlign.value() <= 16 && \"Required alignment greater than stack alignment.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6540, __extension__
__PRETTY_FUNCTION__))
6540 "Required alignment greater than stack alignment.")(static_cast <bool> (RequiredAlign.value() <= 16 &&
"Required alignment greater than stack alignment.") ? void (
0) : __assert_fail ("RequiredAlign.value() <= 16 && \"Required alignment greater than stack alignment.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6540, __extension__
__PRETTY_FUNCTION__))
;
6541 switch (Reg) {
6542 default:
6543 report_fatal_error("called on invalid register.");
6544 case PPC::R5:
6545 case PPC::R9:
6546 case PPC::X3:
6547 case PPC::X5:
6548 case PPC::X7:
6549 case PPC::X9:
6550 // These registers are 16 byte aligned which is the most strict aligment
6551 // we can support.
6552 return true;
6553 case PPC::R3:
6554 case PPC::R7:
6555 case PPC::X4:
6556 case PPC::X6:
6557 case PPC::X8:
6558 case PPC::X10:
6559 // The shadow of these registers in the PSA is 8 byte aligned.
6560 return RequiredAlign <= 8;
6561 case PPC::R4:
6562 case PPC::R6:
6563 case PPC::R8:
6564 case PPC::R10:
6565 return RequiredAlign <= 4;
6566 }
6567}
6568
6569static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6570 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6571 CCState &S) {
6572 AIXCCState &State = static_cast<AIXCCState &>(S);
6573 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6574 State.getMachineFunction().getSubtarget());
6575 const bool IsPPC64 = Subtarget.isPPC64();
6576 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6577 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6578
6579 if (ValVT == MVT::f128)
6580 report_fatal_error("f128 is unimplemented on AIX.");
6581
6582 if (ArgFlags.isNest())
6583 report_fatal_error("Nest arguments are unimplemented.");
6584
6585 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6586 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6587 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6588 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6589 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6590 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6591
6592 static const MCPhysReg VR[] = {// Vector registers.
6593 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6594 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6595 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6596
6597 if (ArgFlags.isByVal()) {
6598 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6599 report_fatal_error("Pass-by-value arguments with alignment greater than "
6600 "register width are not supported.");
6601
6602 const unsigned ByValSize = ArgFlags.getByValSize();
6603
6604 // An empty aggregate parameter takes up no storage and no registers,
6605 // but needs a MemLoc for a stack slot for the formal arguments side.
6606 if (ByValSize == 0) {
6607 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6608 State.getNextStackOffset(), RegVT,
6609 LocInfo));
6610 return false;
6611 }
6612
6613 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6614 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6615 for (const unsigned E = Offset + StackSize; Offset < E;
6616 Offset += PtrAlign.value()) {
6617 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6618 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6619 else {
6620 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6621 Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6622 LocInfo));
6623 break;
6624 }
6625 }
6626 return false;
6627 }
6628
6629 // Arguments always reserve parameter save area.
6630 switch (ValVT.SimpleTy) {
6631 default:
6632 report_fatal_error("Unhandled value type for argument.");
6633 case MVT::i64:
6634 // i64 arguments should have been split to i32 for PPC32.
6635 assert(IsPPC64 && "PPC32 should have split i64 values.")(static_cast <bool> (IsPPC64 && "PPC32 should have split i64 values."
) ? void (0) : __assert_fail ("IsPPC64 && \"PPC32 should have split i64 values.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6635, __extension__
__PRETTY_FUNCTION__))
;
6636 LLVM_FALLTHROUGH[[gnu::fallthrough]];
6637 case MVT::i1:
6638 case MVT::i32: {
6639 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6640 // AIX integer arguments are always passed in register width.
6641 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6642 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6643 : CCValAssign::LocInfo::ZExt;
6644 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6645 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6646 else
6647 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6648
6649 return false;
6650 }
6651 case MVT::f32:
6652 case MVT::f64: {
6653 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6654 const unsigned StoreSize = LocVT.getStoreSize();
6655 // Floats are always 4-byte aligned in the PSA on AIX.
6656 // This includes f64 in 64-bit mode for ABI compatibility.
6657 const unsigned Offset =
6658 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6659 unsigned FReg = State.AllocateReg(FPR);
6660 if (FReg)
6661 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6662
6663 // Reserve and initialize GPRs or initialize the PSA as required.
6664 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6665 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6666 assert(FReg && "An FPR should be available when a GPR is reserved.")(static_cast <bool> (FReg && "An FPR should be available when a GPR is reserved."
) ? void (0) : __assert_fail ("FReg && \"An FPR should be available when a GPR is reserved.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6666, __extension__
__PRETTY_FUNCTION__))
;
6667 if (State.isVarArg()) {
6668 // Successfully reserved GPRs are only initialized for vararg calls.
6669 // Custom handling is required for:
6670 // f64 in PPC32 needs to be split into 2 GPRs.
6671 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6672 State.addLoc(
6673 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6674 }
6675 } else {
6676 // If there are insufficient GPRs, the PSA needs to be initialized.
6677 // Initialization occurs even if an FPR was initialized for
6678 // compatibility with the AIX XL compiler. The full memory for the
6679 // argument will be initialized even if a prior word is saved in GPR.
6680 // A custom memLoc is used when the argument also passes in FPR so
6681 // that the callee handling can skip over it easily.
6682 State.addLoc(
6683 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6684 LocInfo)
6685 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6686 break;
6687 }
6688 }
6689
6690 return false;
6691 }
6692 case MVT::v4f32:
6693 case MVT::v4i32:
6694 case MVT::v8i16:
6695 case MVT::v16i8:
6696 case MVT::v2i64:
6697 case MVT::v2f64:
6698 case MVT::v1i128: {
6699 const unsigned VecSize = 16;
6700 const Align VecAlign(VecSize);
6701
6702 if (!State.isVarArg()) {
6703 // If there are vector registers remaining we don't consume any stack
6704 // space.
6705 if (unsigned VReg = State.AllocateReg(VR)) {
6706 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6707 return false;
6708 }
6709 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6710 // might be allocated in the portion of the PSA that is shadowed by the
6711 // GPRs.
6712 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6713 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6714 return false;
6715 }
6716
6717 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6718 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6719
6720 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6721 // Burn any underaligned registers and their shadowed stack space until
6722 // we reach the required alignment.
6723 while (NextRegIndex != GPRs.size() &&
6724 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6725 // Shadow allocate register and its stack shadow.
6726 unsigned Reg = State.AllocateReg(GPRs);
6727 State.AllocateStack(PtrSize, PtrAlign);
6728 assert(Reg && "Allocating register unexpectedly failed.")(static_cast <bool> (Reg && "Allocating register unexpectedly failed."
) ? void (0) : __assert_fail ("Reg && \"Allocating register unexpectedly failed.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6728, __extension__
__PRETTY_FUNCTION__))
;
6729 (void)Reg;
6730 NextRegIndex = State.getFirstUnallocated(GPRs);
6731 }
6732
6733 // Vectors that are passed as fixed arguments are handled differently.
6734 // They are passed in VRs if any are available (unlike arguments passed
6735 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6736 // functions)
6737 if (State.isFixed(ValNo)) {
6738 if (unsigned VReg = State.AllocateReg(VR)) {
6739 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6740 // Shadow allocate GPRs and stack space even though we pass in a VR.
6741 for (unsigned I = 0; I != VecSize; I += PtrSize)
6742 State.AllocateReg(GPRs);
6743 State.AllocateStack(VecSize, VecAlign);
6744 return false;
6745 }
6746 // No vector registers remain so pass on the stack.
6747 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6748 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6749 return false;
6750 }
6751
6752 // If all GPRS are consumed then we pass the argument fully on the stack.
6753 if (NextRegIndex == GPRs.size()) {
6754 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6755 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6756 return false;
6757 }
6758
6759 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6760 // half of the argument, and then need to pass the remaining half on the
6761 // stack.
6762 if (GPRs[NextRegIndex] == PPC::R9) {
6763 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6764 State.addLoc(
6765 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6766
6767 const unsigned FirstReg = State.AllocateReg(PPC::R9);
6768 const unsigned SecondReg = State.AllocateReg(PPC::R10);
6769 assert(FirstReg && SecondReg &&(static_cast <bool> (FirstReg && SecondReg &&
"Allocating R9 or R10 unexpectedly failed.") ? void (0) : __assert_fail
("FirstReg && SecondReg && \"Allocating R9 or R10 unexpectedly failed.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6770, __extension__
__PRETTY_FUNCTION__))
6770 "Allocating R9 or R10 unexpectedly failed.")(static_cast <bool> (FirstReg && SecondReg &&
"Allocating R9 or R10 unexpectedly failed.") ? void (0) : __assert_fail
("FirstReg && SecondReg && \"Allocating R9 or R10 unexpectedly failed.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6770, __extension__
__PRETTY_FUNCTION__))
;
6771 State.addLoc(
6772 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6773 State.addLoc(
6774 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6775 return false;
6776 }
6777
6778 // We have enough GPRs to fully pass the vector argument, and we have
6779 // already consumed any underaligned registers. Start with the custom
6780 // MemLoc and then the custom RegLocs.
6781 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6782 State.addLoc(
6783 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6784 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6785 const unsigned Reg = State.AllocateReg(GPRs);
6786 assert(Reg && "Failed to allocated register for vararg vector argument")(static_cast <bool> (Reg && "Failed to allocated register for vararg vector argument"
) ? void (0) : __assert_fail ("Reg && \"Failed to allocated register for vararg vector argument\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6786, __extension__
__PRETTY_FUNCTION__))
;
6787 State.addLoc(
6788 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6789 }
6790 return false;
6791 }
6792 }
6793 return true;
6794}
6795
6796// So far, this function is only used by LowerFormalArguments_AIX()
6797static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6798 bool IsPPC64,
6799 bool HasP8Vector,
6800 bool HasVSX) {
6801 assert((IsPPC64 || SVT != MVT::i64) &&(static_cast <bool> ((IsPPC64 || SVT != MVT::i64) &&
"i64 should have been split for 32-bit codegen.") ? void (0)
: __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6802, __extension__
__PRETTY_FUNCTION__))
6802 "i64 should have been split for 32-bit codegen.")(static_cast <bool> ((IsPPC64 || SVT != MVT::i64) &&
"i64 should have been split for 32-bit codegen.") ? void (0)
: __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6802, __extension__
__PRETTY_FUNCTION__))
;
6803
6804 switch (SVT) {
6805 default:
6806 report_fatal_error("Unexpected value type for formal argument");
6807 case MVT::i1:
6808 case MVT::i32:
6809 case MVT::i64:
6810 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6811 case MVT::f32:
6812 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6813 case MVT::f64:
6814 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6815 case MVT::v4f32:
6816 case MVT::v4i32:
6817 case MVT::v8i16:
6818 case MVT::v16i8:
6819 case MVT::v2i64:
6820 case MVT::v2f64:
6821 case MVT::v1i128:
6822 return &PPC::VRRCRegClass;
6823 }
6824}
6825
6826static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
6827 SelectionDAG &DAG, SDValue ArgValue,
6828 MVT LocVT, const SDLoc &dl) {
6829 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger())(static_cast <bool> (ValVT.isScalarInteger() &&
LocVT.isScalarInteger()) ? void (0) : __assert_fail ("ValVT.isScalarInteger() && LocVT.isScalarInteger()"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6829, __extension__
__PRETTY_FUNCTION__))
;
6830 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())(static_cast <bool> (ValVT.getFixedSizeInBits() < LocVT
.getFixedSizeInBits()) ? void (0) : __assert_fail ("ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6830, __extension__
__PRETTY_FUNCTION__))
;
6831
6832 if (Flags.isSExt())
6833 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6834 DAG.getValueType(ValVT));
6835 else if (Flags.isZExt())
6836 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6837 DAG.getValueType(ValVT));
6838
6839 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6840}
6841
6842static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6843 const unsigned LASize = FL->getLinkageSize();
6844
6845 if (PPC::GPRCRegClass.contains(Reg)) {
6846 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&(static_cast <bool> (Reg >= PPC::R3 && Reg <=
PPC::R10 && "Reg must be a valid argument register!"
) ? void (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6847, __extension__
__PRETTY_FUNCTION__))
6847 "Reg must be a valid argument register!")(static_cast <bool> (Reg >= PPC::R3 && Reg <=
PPC::R10 && "Reg must be a valid argument register!"
) ? void (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6847, __extension__
__PRETTY_FUNCTION__))
;
6848 return LASize + 4 * (Reg - PPC::R3);
6849 }
6850
6851 if (PPC::G8RCRegClass.contains(Reg)) {
6852 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&(static_cast <bool> (Reg >= PPC::X3 && Reg <=
PPC::X10 && "Reg must be a valid argument register!"
) ? void (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6853, __extension__
__PRETTY_FUNCTION__))
6853 "Reg must be a valid argument register!")(static_cast <bool> (Reg >= PPC::X3 && Reg <=
PPC::X10 && "Reg must be a valid argument register!"
) ? void (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6853, __extension__
__PRETTY_FUNCTION__))
;
6854 return LASize + 8 * (Reg - PPC::X3);
6855 }
6856
6857 llvm_unreachable("Only general purpose registers expected.")::llvm::llvm_unreachable_internal("Only general purpose registers expected."
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6857)
;
6858}
6859
6860// AIX ABI Stack Frame Layout:
6861//
6862// Low Memory +--------------------------------------------+
6863// SP +---> | Back chain | ---+
6864// | +--------------------------------------------+ |
6865// | | Saved Condition Register | |
6866// | +--------------------------------------------+ |
6867// | | Saved Linkage Register | |
6868// | +--------------------------------------------+ | Linkage Area
6869// | | Reserved for compilers | |
6870// | +--------------------------------------------+ |
6871// | | Reserved for binders | |
6872// | +--------------------------------------------+ |
6873// | | Saved TOC pointer | ---+
6874// | +--------------------------------------------+
6875// | | Parameter save area |
6876// | +--------------------------------------------+
6877// | | Alloca space |
6878// | +--------------------------------------------+
6879// | | Local variable space |
6880// | +--------------------------------------------+
6881// | | Float/int conversion temporary |
6882// | +--------------------------------------------+
6883// | | Save area for AltiVec registers |
6884// | +--------------------------------------------+
6885// | | AltiVec alignment padding |
6886// | +--------------------------------------------+
6887// | | Save area for VRSAVE register |
6888// | +--------------------------------------------+
6889// | | Save area for General Purpose registers |
6890// | +--------------------------------------------+
6891// | | Save area for Floating Point registers |
6892// | +--------------------------------------------+
6893// +---- | Back chain |
6894// High Memory +--------------------------------------------+
6895//
6896// Specifications:
6897// AIX 7.2 Assembler Language Reference
6898// Subroutine linkage convention
6899
6900SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6901 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6902 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6903 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6904
6905 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unexpected calling convention!") ? void (0) : __assert_fail
("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6907, __extension__
__PRETTY_FUNCTION__))
6906 CallConv == CallingConv::Fast) &&(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unexpected calling convention!") ? void (0) : __assert_fail
("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6907, __extension__
__PRETTY_FUNCTION__))
6907 "Unexpected calling convention!")(static_cast <bool> ((CallConv == CallingConv::C || CallConv
== CallingConv::Cold || CallConv == CallingConv::Fast) &&
"Unexpected calling convention!") ? void (0) : __assert_fail
("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6907, __extension__
__PRETTY_FUNCTION__))
;
6908
6909 if (getTargetMachine().Options.GuaranteedTailCallOpt)
6910 report_fatal_error("Tail call support is unimplemented on AIX.");
6911
6912 if (useSoftFloat())
6913 report_fatal_error("Soft float support is unimplemented on AIX.");
6914
6915 const PPCSubtarget &Subtarget =
6916 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6917
6918 const bool IsPPC64 = Subtarget.isPPC64();
6919 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6920
6921 // Assign locations to all of the incoming arguments.
6922 SmallVector<CCValAssign, 16> ArgLocs;
6923 MachineFunction &MF = DAG.getMachineFunction();
6924 MachineFrameInfo &MFI = MF.getFrameInfo();
6925 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6926 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6927
6928 const EVT PtrVT = getPointerTy(MF.getDataLayout());
6929 // Reserve space for the linkage area on the stack.
6930 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6931 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6932 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6933
6934 SmallVector<SDValue, 8> MemOps;
6935
6936 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6937 CCValAssign &VA = ArgLocs[I++];
6938 MVT LocVT = VA.getLocVT();
6939 MVT ValVT = VA.getValVT();
6940 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6941 // For compatibility with the AIX XL compiler, the float args in the
6942 // parameter save area are initialized even if the argument is available
6943 // in register. The caller is required to initialize both the register
6944 // and memory, however, the callee can choose to expect it in either.
6945 // The memloc is dismissed here because the argument is retrieved from
6946 // the register.
6947 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
6948 continue;
6949
6950 auto HandleMemLoc = [&]() {
6951 const unsigned LocSize = LocVT.getStoreSize();
6952 const unsigned ValSize = ValVT.getStoreSize();
6953 assert((ValSize <= LocSize) &&(static_cast <bool> ((ValSize <= LocSize) &&
"Object size is larger than size of MemLoc") ? void (0) : __assert_fail
("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6954, __extension__
__PRETTY_FUNCTION__))
6954 "Object size is larger than size of MemLoc")(static_cast <bool> ((ValSize <= LocSize) &&
"Object size is larger than size of MemLoc") ? void (0) : __assert_fail
("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6954, __extension__
__PRETTY_FUNCTION__))
;
6955 int CurArgOffset = VA.getLocMemOffset();
6956 // Objects are right-justified because AIX is big-endian.
6957 if (LocSize > ValSize)
6958 CurArgOffset += LocSize - ValSize;
6959 // Potential tail calls could cause overwriting of argument stack slots.
6960 const bool IsImmutable =
6961 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
6962 (CallConv == CallingConv::Fast));
6963 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6964 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6965 SDValue ArgValue =
6966 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6967 InVals.push_back(ArgValue);
6968 };
6969
6970 // Vector arguments to VaArg functions are passed both on the stack, and
6971 // in any available GPRs. Load the value from the stack and add the GPRs
6972 // as live ins.
6973 if (VA.isMemLoc() && VA.needsCustom()) {
6974 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.")(static_cast <bool> (ValVT.isVector() && "Unexpected Custom MemLoc type."
) ? void (0) : __assert_fail ("ValVT.isVector() && \"Unexpected Custom MemLoc type.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6974, __extension__
__PRETTY_FUNCTION__))
;
6975 assert(isVarArg && "Only use custom memloc for vararg.")(static_cast <bool> (isVarArg && "Only use custom memloc for vararg."
) ? void (0) : __assert_fail ("isVarArg && \"Only use custom memloc for vararg.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6975, __extension__
__PRETTY_FUNCTION__))
;
6976 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
6977 // matching custom RegLocs.
6978 const unsigned OriginalValNo = VA.getValNo();
6979 (void)OriginalValNo;
6980
6981 auto HandleCustomVecRegLoc = [&]() {
6982 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&(static_cast <bool> (I != End && ArgLocs[I].isRegLoc
() && ArgLocs[I].needsCustom() && "Missing custom RegLoc."
) ? void (0) : __assert_fail ("I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() && \"Missing custom RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6983, __extension__
__PRETTY_FUNCTION__))
6983 "Missing custom RegLoc.")(static_cast <bool> (I != End && ArgLocs[I].isRegLoc
() && ArgLocs[I].needsCustom() && "Missing custom RegLoc."
) ? void (0) : __assert_fail ("I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() && \"Missing custom RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6983, __extension__
__PRETTY_FUNCTION__))
;
6984 VA = ArgLocs[I++];
6985 assert(VA.getValVT().isVector() &&(static_cast <bool> (VA.getValVT().isVector() &&
"Unexpected Val type for custom RegLoc.") ? void (0) : __assert_fail
("VA.getValVT().isVector() && \"Unexpected Val type for custom RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6986, __extension__
__PRETTY_FUNCTION__))
6986 "Unexpected Val type for custom RegLoc.")(static_cast <bool> (VA.getValVT().isVector() &&
"Unexpected Val type for custom RegLoc.") ? void (0) : __assert_fail
("VA.getValVT().isVector() && \"Unexpected Val type for custom RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6986, __extension__
__PRETTY_FUNCTION__))
;
6987 assert(VA.getValNo() == OriginalValNo &&(static_cast <bool> (VA.getValNo() == OriginalValNo &&
"ValNo mismatch between custom MemLoc and RegLoc.") ? void (
0) : __assert_fail ("VA.getValNo() == OriginalValNo && \"ValNo mismatch between custom MemLoc and RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6988, __extension__
__PRETTY_FUNCTION__))
6988 "ValNo mismatch between custom MemLoc and RegLoc.")(static_cast <bool> (VA.getValNo() == OriginalValNo &&
"ValNo mismatch between custom MemLoc and RegLoc.") ? void (
0) : __assert_fail ("VA.getValNo() == OriginalValNo && \"ValNo mismatch between custom MemLoc and RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 6988, __extension__
__PRETTY_FUNCTION__))
;
6989 MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
6990 MF.addLiveIn(VA.getLocReg(),
6991 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
6992 Subtarget.hasVSX()));
6993 };
6994
6995 HandleMemLoc();
6996 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
6997 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
6998 // R10.
6999 HandleCustomVecRegLoc();
7000 HandleCustomVecRegLoc();
7001
7002 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7003 // we passed the vector in R5, R6, R7 and R8.
7004 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7005 assert(!IsPPC64 &&(static_cast <bool> (!IsPPC64 && "Only 2 custom RegLocs expected for 64-bit codegen."
) ? void (0) : __assert_fail ("!IsPPC64 && \"Only 2 custom RegLocs expected for 64-bit codegen.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7006, __extension__
__PRETTY_FUNCTION__))
7006 "Only 2 custom RegLocs expected for 64-bit codegen.")(static_cast <bool> (!IsPPC64 && "Only 2 custom RegLocs expected for 64-bit codegen."
) ? void (0) : __assert_fail ("!IsPPC64 && \"Only 2 custom RegLocs expected for 64-bit codegen.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7006, __extension__
__PRETTY_FUNCTION__))
;
7007 HandleCustomVecRegLoc();
7008 HandleCustomVecRegLoc();
7009 }
7010
7011 continue;
7012 }
7013
7014 if (VA.isRegLoc()) {
7015 if (VA.getValVT().isScalarInteger())
7016 FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7017 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7018 switch (VA.getValVT().SimpleTy) {
7019 default:
7020 report_fatal_error("Unhandled value type for argument.");
7021 case MVT::f32:
7022 FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint);
7023 break;
7024 case MVT::f64:
7025 FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint);
7026 break;
7027 }
7028 } else if (VA.getValVT().isVector()) {
7029 switch (VA.getValVT().SimpleTy) {
7030 default:
7031 report_fatal_error("Unhandled value type for argument.");
7032 case MVT::v16i8:
7033 FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar);
7034 break;
7035 case MVT::v8i16:
7036 FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort);
7037 break;
7038 case MVT::v4i32:
7039 case MVT::v2i64:
7040 case MVT::v1i128:
7041 FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt);
7042 break;
7043 case MVT::v4f32:
7044 case MVT::v2f64:
7045 FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat);
7046 break;
7047 }
7048 }
7049 }
7050
7051 if (Flags.isByVal() && VA.isMemLoc()) {
7052 const unsigned Size =
7053 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7054 PtrByteSize);
7055 const int FI = MF.getFrameInfo().CreateFixedObject(
7056 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7057 /* IsAliased */ true);
7058 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7059 InVals.push_back(FIN);
7060
7061 continue;
7062 }
7063
7064 if (Flags.isByVal()) {
7065 assert(VA.isRegLoc() && "MemLocs should already be handled.")(static_cast <bool> (VA.isRegLoc() && "MemLocs should already be handled."
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"MemLocs should already be handled.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7065, __extension__
__PRETTY_FUNCTION__))
;
7066
7067 const MCPhysReg ArgReg = VA.getLocReg();
7068 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7069
7070 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7071 report_fatal_error("Over aligned byvals not supported yet.");
7072
7073 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7074 const int FI = MF.getFrameInfo().CreateFixedObject(
7075 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7076 /* IsAliased */ true);
7077 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7078 InVals.push_back(FIN);
7079
7080 // Add live ins for all the RegLocs for the same ByVal.
7081 const TargetRegisterClass *RegClass =
7082 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7083
7084 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7085 unsigned Offset) {
7086 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7087 // Since the callers side has left justified the aggregate in the
7088 // register, we can simply store the entire register into the stack
7089 // slot.
7090 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7091 // The store to the fixedstack object is needed becuase accessing a
7092 // field of the ByVal will use a gep and load. Ideally we will optimize
7093 // to extracting the value from the register directly, and elide the
7094 // stores when the arguments address is not taken, but that will need to
7095 // be future work.
7096 SDValue Store = DAG.getStore(
7097 CopyFrom.getValue(1), dl, CopyFrom,
7098 DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7099 MachinePointerInfo::getFixedStack(MF, FI, Offset));
7100
7101 MemOps.push_back(Store);
7102 };
7103
7104 unsigned Offset = 0;
7105 HandleRegLoc(VA.getLocReg(), Offset);
7106 Offset += PtrByteSize;
7107 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7108 Offset += PtrByteSize) {
7109 assert(ArgLocs[I].getValNo() == VA.getValNo() &&(static_cast <bool> (ArgLocs[I].getValNo() == VA.getValNo
() && "RegLocs should be for ByVal argument.") ? void
(0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7110, __extension__
__PRETTY_FUNCTION__))
7110 "RegLocs should be for ByVal argument.")(static_cast <bool> (ArgLocs[I].getValNo() == VA.getValNo
() && "RegLocs should be for ByVal argument.") ? void
(0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7110, __extension__
__PRETTY_FUNCTION__))
;
7111
7112 const CCValAssign RL = ArgLocs[I++];
7113 HandleRegLoc(RL.getLocReg(), Offset);
7114 FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7115 }
7116
7117 if (Offset != StackSize) {
7118 assert(ArgLocs[I].getValNo() == VA.getValNo() &&(static_cast <bool> (ArgLocs[I].getValNo() == VA.getValNo
() && "Expected MemLoc for remaining bytes.") ? void (
0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7119, __extension__
__PRETTY_FUNCTION__))
7119 "Expected MemLoc for remaining bytes.")(static_cast <bool> (ArgLocs[I].getValNo() == VA.getValNo
() && "Expected MemLoc for remaining bytes.") ? void (
0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7119, __extension__
__PRETTY_FUNCTION__))
;
7120 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.")(static_cast <bool> (ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."
) ? void (0) : __assert_fail ("ArgLocs[I].isMemLoc() && \"Expected MemLoc for remaining bytes.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7120, __extension__
__PRETTY_FUNCTION__))
;
7121 // Consume the MemLoc.The InVal has already been emitted, so nothing
7122 // more needs to be done.
7123 ++I;
7124 }
7125
7126 continue;
7127 }
7128
7129 if (VA.isRegLoc() && !VA.needsCustom()) {
7130 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7131 Register VReg =
7132 MF.addLiveIn(VA.getLocReg(),
7133 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7134 Subtarget.hasVSX()));
7135 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7136 if (ValVT.isScalarInteger() &&
7137 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7138 ArgValue =
7139 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7140 }
7141 InVals.push_back(ArgValue);
7142 continue;
7143 }
7144 if (VA.isMemLoc()) {
7145 HandleMemLoc();
7146 continue;
7147 }
7148 }
7149
7150 // On AIX a minimum of 8 words is saved to the parameter save area.
7151 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7152 // Area that is at least reserved in the caller of this function.
7153 unsigned CallerReservedArea =
7154 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7155
7156 // Set the size that is at least reserved in caller of this function. Tail
7157 // call optimized function's reserved stack space needs to be aligned so
7158 // that taking the difference between two stack areas will result in an
7159 // aligned stack.
7160 CallerReservedArea =
7161 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7162 FuncInfo->setMinReservedArea(CallerReservedArea);
7163
7164 if (isVarArg) {
7165 FuncInfo->setVarArgsFrameIndex(
7166 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7167 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7168
7169 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7170 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7171
7172 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7173 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7174 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7175
7176 // The fixed integer arguments of a variadic function are stored to the
7177 // VarArgsFrameIndex on the stack so that they may be loaded by
7178 // dereferencing the result of va_next.
7179 for (unsigned GPRIndex =
7180 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7181 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7182
7183 const Register VReg =
7184 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7185 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7186
7187 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7188 SDValue Store =
7189 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7190 MemOps.push_back(Store);
7191 // Increment the address for the next argument to store.
7192 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7193 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7194 }
7195 }
7196
7197 if (!MemOps.empty())
7198 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7199
7200 return Chain;
7201}
7202
7203SDValue PPCTargetLowering::LowerCall_AIX(
7204 SDValue Chain, SDValue Callee, CallFlags CFlags,
7205 const SmallVectorImpl<ISD::OutputArg> &Outs,
7206 const SmallVectorImpl<SDValue> &OutVals,
7207 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7208 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7209 const CallBase *CB) const {
7210 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7211 // AIX ABI stack frame layout.
7212
7213 assert((CFlags.CallConv == CallingConv::C ||(static_cast <bool> ((CFlags.CallConv == CallingConv::C
|| CFlags.CallConv == CallingConv::Cold || CFlags.CallConv ==
CallingConv::Fast) && "Unexpected calling convention!"
) ? void (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7216, __extension__
__PRETTY_FUNCTION__))
7214 CFlags.CallConv == CallingConv::Cold ||(static_cast <bool> ((CFlags.CallConv == CallingConv::C
|| CFlags.CallConv == CallingConv::Cold || CFlags.CallConv ==
CallingConv::Fast) && "Unexpected calling convention!"
) ? void (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7216, __extension__
__PRETTY_FUNCTION__))
7215 CFlags.CallConv == CallingConv::Fast) &&(static_cast <bool> ((CFlags.CallConv == CallingConv::C
|| CFlags.CallConv == CallingConv::Cold || CFlags.CallConv ==
CallingConv::Fast) && "Unexpected calling convention!"
) ? void (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7216, __extension__
__PRETTY_FUNCTION__))
7216 "Unexpected calling convention!")(static_cast <bool> ((CFlags.CallConv == CallingConv::C
|| CFlags.CallConv == CallingConv::Cold || CFlags.CallConv ==
CallingConv::Fast) && "Unexpected calling convention!"
) ? void (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7216, __extension__
__PRETTY_FUNCTION__))
;
7217
7218 if (CFlags.IsPatchPoint)
7219 report_fatal_error("This call type is unimplemented on AIX.");
7220
7221 const PPCSubtarget& Subtarget =
7222 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7223
7224 MachineFunction &MF = DAG.getMachineFunction();
7225 SmallVector<CCValAssign, 16> ArgLocs;
7226 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7227 *DAG.getContext());
7228
7229 // Reserve space for the linkage save area (LSA) on the stack.
7230 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7231 // [SP][CR][LR][2 x reserved][TOC].
7232 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7233 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7234 const bool IsPPC64 = Subtarget.isPPC64();
7235 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7236 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7237 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7238 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7239
7240 // The prolog code of the callee may store up to 8 GPR argument registers to
7241 // the stack, allowing va_start to index over them in memory if the callee
7242 // is variadic.
7243 // Because we cannot tell if this is needed on the caller side, we have to
7244 // conservatively assume that it is needed. As such, make sure we have at
7245 // least enough stack space for the caller to store the 8 GPRs.
7246 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7247 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7248 CCInfo.getNextStackOffset());
7249
7250 // Adjust the stack pointer for the new arguments...
7251 // These operations are automatically eliminated by the prolog/epilog pass.
7252 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7253 SDValue CallSeqStart = Chain;
7254
7255 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7256 SmallVector<SDValue, 8> MemOpChains;
7257
7258 // Set up a copy of the stack pointer for loading and storing any
7259 // arguments that may not fit in the registers available for argument
7260 // passing.
7261 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7262 : DAG.getRegister(PPC::R1, MVT::i32);
7263
7264 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7265 const unsigned ValNo = ArgLocs[I].getValNo();
7266 SDValue Arg = OutVals[ValNo];
7267 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7268
7269 if (Flags.isByVal()) {
7270 const unsigned ByValSize = Flags.getByValSize();
7271
7272 // Nothing to do for zero-sized ByVals on the caller side.
7273 if (!ByValSize) {
7274 ++I;
7275 continue;
7276 }
7277
7278 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7279 return DAG.getExtLoad(
7280 ISD::ZEXTLOAD, dl, PtrVT, Chain,
7281 (LoadOffset != 0)
7282 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7283 : Arg,
7284 MachinePointerInfo(), VT);
7285 };
7286
7287 unsigned LoadOffset = 0;
7288
7289 // Initialize registers, which are fully occupied by the by-val argument.
7290 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7291 SDValue Load = GetLoad(PtrVT, LoadOffset);
7292 MemOpChains.push_back(Load.getValue(1));
7293 LoadOffset += PtrByteSize;
7294 const CCValAssign &ByValVA = ArgLocs[I++];
7295 assert(ByValVA.getValNo() == ValNo &&(static_cast <bool> (ByValVA.getValNo() == ValNo &&
"Unexpected location for pass-by-value argument.") ? void (0
) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7296, __extension__
__PRETTY_FUNCTION__))
7296 "Unexpected location for pass-by-value argument.")(static_cast <bool> (ByValVA.getValNo() == ValNo &&
"Unexpected location for pass-by-value argument.") ? void (0
) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7296, __extension__
__PRETTY_FUNCTION__))
;
7297 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7298 }
7299
7300 if (LoadOffset == ByValSize)
7301 continue;
7302
7303 // There must be one more loc to handle the remainder.
7304 assert(ArgLocs[I].getValNo() == ValNo &&(static_cast <bool> (ArgLocs[I].getValNo() == ValNo &&
"Expected additional location for by-value argument.") ? void
(0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7305, __extension__
__PRETTY_FUNCTION__))
7305 "Expected additional location for by-value argument.")(static_cast <bool> (ArgLocs[I].getValNo() == ValNo &&
"Expected additional location for by-value argument.") ? void
(0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7305, __extension__
__PRETTY_FUNCTION__))
;
7306
7307 if (ArgLocs[I].isMemLoc()) {
7308 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.")(static_cast <bool> (LoadOffset < ByValSize &&
"Unexpected memloc for by-val arg.") ? void (0) : __assert_fail
("LoadOffset < ByValSize && \"Unexpected memloc for by-val arg.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7308, __extension__
__PRETTY_FUNCTION__))
;
7309 const CCValAssign &ByValVA = ArgLocs[I++];
7310 ISD::ArgFlagsTy MemcpyFlags = Flags;
7311 // Only memcpy the bytes that don't pass in register.
7312 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7313 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7314 (LoadOffset != 0)
7315 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7316 : Arg,
7317 DAG.getObjectPtrOffset(dl, StackPtr,
7318 TypeSize::Fixed(ByValVA.getLocMemOffset())),
7319 CallSeqStart, MemcpyFlags, DAG, dl);
7320 continue;
7321 }
7322
7323 // Initialize the final register residue.
7324 // Any residue that occupies the final by-val arg register must be
7325 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7326 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7327 // 2 and 1 byte loads.
7328 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7329 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&(static_cast <bool> (ResidueBytes != 0 && LoadOffset
+ PtrByteSize > ByValSize && "Unexpected register residue for by-value argument."
) ? void (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7330, __extension__
__PRETTY_FUNCTION__))
7330 "Unexpected register residue for by-value argument.")(static_cast <bool> (ResidueBytes != 0 && LoadOffset
+ PtrByteSize > ByValSize && "Unexpected register residue for by-value argument."
) ? void (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7330, __extension__
__PRETTY_FUNCTION__))
;
7331 SDValue ResidueVal;
7332 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7333 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7334 const MVT VT =
7335 N == 1 ? MVT::i8
7336 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7337 SDValue Load = GetLoad(VT, LoadOffset);
7338 MemOpChains.push_back(Load.getValue(1));
7339 LoadOffset += N;
7340 Bytes += N;
7341
7342 // By-val arguments are passed left-justfied in register.
7343 // Every load here needs to be shifted, otherwise a full register load
7344 // should have been used.
7345 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&(static_cast <bool> (PtrVT.getSimpleVT().getSizeInBits(
) > (Bytes * 8) && "Unexpected load emitted during handling of pass-by-value "
"argument.") ? void (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7347, __extension__
__PRETTY_FUNCTION__))
7346 "Unexpected load emitted during handling of pass-by-value "(static_cast <bool> (PtrVT.getSimpleVT().getSizeInBits(
) > (Bytes * 8) && "Unexpected load emitted during handling of pass-by-value "
"argument.") ? void (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7347, __extension__
__PRETTY_FUNCTION__))
7347 "argument.")(static_cast <bool> (PtrVT.getSimpleVT().getSizeInBits(
) > (Bytes * 8) && "Unexpected load emitted during handling of pass-by-value "
"argument.") ? void (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7347, __extension__
__PRETTY_FUNCTION__))
;
7348 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7349 EVT ShiftAmountTy =
7350 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7351 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7352 SDValue ShiftedLoad =
7353 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7354 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7355 ShiftedLoad)
7356 : ShiftedLoad;
7357 }
7358
7359 const CCValAssign &ByValVA = ArgLocs[I++];
7360 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7361 continue;
7362 }
7363
7364 CCValAssign &VA = ArgLocs[I++];
7365 const MVT LocVT = VA.getLocVT();
7366 const MVT ValVT = VA.getValVT();
7367
7368 switch (VA.getLocInfo()) {
7369 default:
7370 report_fatal_error("Unexpected argument extension type.");
7371 case CCValAssign::Full:
7372 break;
7373 case CCValAssign::ZExt:
7374 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7375 break;
7376 case CCValAssign::SExt:
7377 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7378 break;
7379 }
7380
7381 if (VA.isRegLoc() && !VA.needsCustom()) {
7382 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7383 continue;
7384 }
7385
7386 // Vector arguments passed to VarArg functions need custom handling when
7387 // they are passed (at least partially) in GPRs.
7388 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7389 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.")(static_cast <bool> (CFlags.IsVarArg && "Custom MemLocs only used for Vector args."
) ? void (0) : __assert_fail ("CFlags.IsVarArg && \"Custom MemLocs only used for Vector args.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7389, __extension__
__PRETTY_FUNCTION__))
;
7390 // Store value to its stack slot.
7391 SDValue PtrOff =
7392 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7393 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7394 SDValue Store =
7395 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7396 MemOpChains.push_back(Store);
7397 const unsigned OriginalValNo = VA.getValNo();
7398 // Then load the GPRs from the stack
7399 unsigned LoadOffset = 0;
7400 auto HandleCustomVecRegLoc = [&]() {
7401 assert(I != E && "Unexpected end of CCvalAssigns.")(static_cast <bool> (I != E && "Unexpected end of CCvalAssigns."
) ? void (0) : __assert_fail ("I != E && \"Unexpected end of CCvalAssigns.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7401, __extension__
__PRETTY_FUNCTION__))
;
7402 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&(static_cast <bool> (ArgLocs[I].isRegLoc() && ArgLocs
[I].needsCustom() && "Expected custom RegLoc.") ? void
(0) : __assert_fail ("ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() && \"Expected custom RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7403, __extension__
__PRETTY_FUNCTION__))
7403 "Expected custom RegLoc.")(static_cast <bool> (ArgLocs[I].isRegLoc() && ArgLocs
[I].needsCustom() && "Expected custom RegLoc.") ? void
(0) : __assert_fail ("ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() && \"Expected custom RegLoc.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7403, __extension__
__PRETTY_FUNCTION__))
;
7404 CCValAssign RegVA = ArgLocs[I++];
7405 assert(RegVA.getValNo() == OriginalValNo &&(static_cast <bool> (RegVA.getValNo() == OriginalValNo &&
"Custom MemLoc ValNo and custom RegLoc ValNo must match.") ?
void (0) : __assert_fail ("RegVA.getValNo() == OriginalValNo && \"Custom MemLoc ValNo and custom RegLoc ValNo must match.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7406, __extension__
__PRETTY_FUNCTION__))
7406 "Custom MemLoc ValNo and custom RegLoc ValNo must match.")(static_cast <bool> (RegVA.getValNo() == OriginalValNo &&
"Custom MemLoc ValNo and custom RegLoc ValNo must match.") ?
void (0) : __assert_fail ("RegVA.getValNo() == OriginalValNo && \"Custom MemLoc ValNo and custom RegLoc ValNo must match.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7406, __extension__
__PRETTY_FUNCTION__))
;
7407 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7408 DAG.getConstant(LoadOffset, dl, PtrVT));
7409 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7410 MemOpChains.push_back(Load.getValue(1));
7411 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7412 LoadOffset += PtrByteSize;
7413 };
7414
7415 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7416 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7417 // R10.
7418 HandleCustomVecRegLoc();
7419 HandleCustomVecRegLoc();
7420
7421 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7422 ArgLocs[I].getValNo() == OriginalValNo) {
7423 assert(!IsPPC64 &&(static_cast <bool> (!IsPPC64 && "Only 2 custom RegLocs expected for 64-bit codegen."
) ? void (0) : __assert_fail ("!IsPPC64 && \"Only 2 custom RegLocs expected for 64-bit codegen.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7424, __extension__
__PRETTY_FUNCTION__))
7424 "Only 2 custom RegLocs expected for 64-bit codegen.")(static_cast <bool> (!IsPPC64 && "Only 2 custom RegLocs expected for 64-bit codegen."
) ? void (0) : __assert_fail ("!IsPPC64 && \"Only 2 custom RegLocs expected for 64-bit codegen.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7424, __extension__
__PRETTY_FUNCTION__))
;
7425 HandleCustomVecRegLoc();
7426 HandleCustomVecRegLoc();
7427 }
7428
7429 continue;
7430 }
7431
7432 if (VA.isMemLoc()) {
7433 SDValue PtrOff =
7434 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7435 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7436 MemOpChains.push_back(
7437 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7438
7439 continue;
7440 }
7441
7442 if (!ValVT.isFloatingPoint())
7443 report_fatal_error(
7444 "Unexpected register handling for calling convention.");
7445
7446 // Custom handling is used for GPR initializations for vararg float
7447 // arguments.
7448 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&(static_cast <bool> (VA.isRegLoc() && VA.needsCustom
() && CFlags.IsVarArg && LocVT.isInteger() &&
"Custom register handling only expected for VarArg.") ? void
(0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && LocVT.isInteger() && \"Custom register handling only expected for VarArg.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7450, __extension__
__PRETTY_FUNCTION__))
7449 LocVT.isInteger() &&(static_cast <bool> (VA.isRegLoc() && VA.needsCustom
() && CFlags.IsVarArg && LocVT.isInteger() &&
"Custom register handling only expected for VarArg.") ? void
(0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && LocVT.isInteger() && \"Custom register handling only expected for VarArg.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7450, __extension__
__PRETTY_FUNCTION__))
7450 "Custom register handling only expected for VarArg.")(static_cast <bool> (VA.isRegLoc() && VA.needsCustom
() && CFlags.IsVarArg && LocVT.isInteger() &&
"Custom register handling only expected for VarArg.") ? void
(0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && LocVT.isInteger() && \"Custom register handling only expected for VarArg.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7450, __extension__
__PRETTY_FUNCTION__))
;
7451
7452 SDValue ArgAsInt =
7453 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7454
7455 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7456 // f32 in 32-bit GPR
7457 // f64 in 64-bit GPR
7458 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7459 else if (Arg.getValueType().getFixedSizeInBits() <
7460 LocVT.getFixedSizeInBits())
7461 // f32 in 64-bit GPR.
7462 RegsToPass.push_back(std::make_pair(
7463 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7464 else {
7465 // f64 in two 32-bit GPRs
7466 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7467 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&(static_cast <bool> (Arg.getValueType() == MVT::f64 &&
CFlags.IsVarArg && !IsPPC64 && "Unexpected custom register for argument!"
) ? void (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7468, __extension__
__PRETTY_FUNCTION__))
7468 "Unexpected custom register for argument!")(static_cast <bool> (Arg.getValueType() == MVT::f64 &&
CFlags.IsVarArg && !IsPPC64 && "Unexpected custom register for argument!"
) ? void (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7468, __extension__
__PRETTY_FUNCTION__))
;
7469 CCValAssign &GPR1 = VA;
7470 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7471 DAG.getConstant(32, dl, MVT::i8));
7472 RegsToPass.push_back(std::make_pair(
7473 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7474
7475 if (I != E) {
7476 // If only 1 GPR was available, there will only be one custom GPR and
7477 // the argument will also pass in memory.
7478 CCValAssign &PeekArg = ArgLocs[I];
7479 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7480 assert(PeekArg.needsCustom() && "A second custom GPR is expected.")(static_cast <bool> (PeekArg.needsCustom() && "A second custom GPR is expected."
) ? void (0) : __assert_fail ("PeekArg.needsCustom() && \"A second custom GPR is expected.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7480, __extension__
__PRETTY_FUNCTION__))
;
7481 CCValAssign &GPR2 = ArgLocs[I++];
7482 RegsToPass.push_back(std::make_pair(
7483 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7484 }
7485 }
7486 }
7487 }
7488
7489 if (!MemOpChains.empty())
7490 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7491
7492 // For indirect calls, we need to save the TOC base to the stack for
7493 // restoration after the call.
7494 if (CFlags.IsIndirect) {
7495 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")(static_cast <bool> (!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? void (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7495, __extension__
__PRETTY_FUNCTION__))
;
7496 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7497 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7498 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7499 const unsigned TOCSaveOffset =
7500 Subtarget.getFrameLowering()->getTOCSaveOffset();
7501
7502 setUsesTOCBasePtr(DAG);
7503 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7504 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7505 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7506 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7507 Chain = DAG.getStore(
7508 Val.getValue(1), dl, Val, AddPtr,
7509 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7510 }
7511
7512 // Build a sequence of copy-to-reg nodes chained together with token chain
7513 // and flag operands which copy the outgoing args into the appropriate regs.
7514 SDValue InFlag;
7515 for (auto Reg : RegsToPass) {
7516 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7517 InFlag = Chain.getValue(1);
7518 }
7519
7520 const int SPDiff = 0;
7521 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7522 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7523}
7524
7525bool
7526PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7527 MachineFunction &MF, bool isVarArg,
7528 const SmallVectorImpl<ISD::OutputArg> &Outs,
7529 LLVMContext &Context) const {
7530 SmallVector<CCValAssign, 16> RVLocs;
7531 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7532 return CCInfo.CheckReturn(
7533 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7534 ? RetCC_PPC_Cold
7535 : RetCC_PPC);
7536}
7537
7538SDValue
7539PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7540 bool isVarArg,
7541 const SmallVectorImpl<ISD::OutputArg> &Outs,
7542 const SmallVectorImpl<SDValue> &OutVals,
7543 const SDLoc &dl, SelectionDAG &DAG) const {
7544 SmallVector<CCValAssign, 16> RVLocs;
7545 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7546 *DAG.getContext());
7547 CCInfo.AnalyzeReturn(Outs,
7548 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7549 ? RetCC_PPC_Cold
7550 : RetCC_PPC);
7551
7552 SDValue Flag;
7553 SmallVector<SDValue, 4> RetOps(1, Chain);
7554
7555 // Copy the result values into the output registers.
7556 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7557 CCValAssign &VA = RVLocs[i];
7558 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7558, __extension__
__PRETTY_FUNCTION__))
;
7559
7560 SDValue Arg = OutVals[RealResIdx];
7561
7562 switch (VA.getLocInfo()) {
7563 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7563)
;
7564 case CCValAssign::Full: break;
7565 case CCValAssign::AExt:
7566 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7567 break;
7568 case CCValAssign::ZExt:
7569 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7570 break;
7571 case CCValAssign::SExt:
7572 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7573 break;
7574 }
7575 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7576 bool isLittleEndian = Subtarget.isLittleEndian();
7577 // Legalize ret f64 -> ret 2 x i32.
7578 SDValue SVal =
7579 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7580 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7581 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7582 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7583 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7584 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7585 Flag = Chain.getValue(1);
7586 VA = RVLocs[++i]; // skip ahead to next loc
7587 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7588 } else
7589 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7590 Flag = Chain.getValue(1);
7591 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7592 }
7593
7594 RetOps[0] = Chain; // Update chain.
7595
7596 // Add the flag if we have it.
7597 if (Flag.getNode())
7598 RetOps.push_back(Flag);
7599
7600 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7601}
7602
7603SDValue
7604PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7605 SelectionDAG &DAG) const {
7606 SDLoc dl(Op);
7607
7608 // Get the correct type for integers.
7609 EVT IntVT = Op.getValueType();
7610
7611 // Get the inputs.
7612 SDValue Chain = Op.getOperand(0);
7613 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7614 // Build a DYNAREAOFFSET node.
7615 SDValue Ops[2] = {Chain, FPSIdx};
7616 SDVTList VTs = DAG.getVTList(IntVT);
7617 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7618}
7619
7620SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7621 SelectionDAG &DAG) const {
7622 // When we pop the dynamic allocation we need to restore the SP link.
7623 SDLoc dl(Op);
7624
7625 // Get the correct type for pointers.
7626 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7627
7628 // Construct the stack pointer operand.
7629 bool isPPC64 = Subtarget.isPPC64();
7630 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7631 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7632
7633 // Get the operands for the STACKRESTORE.
7634 SDValue Chain = Op.getOperand(0);
7635 SDValue SaveSP = Op.getOperand(1);
7636
7637 // Load the old link SP.
7638 SDValue LoadLinkSP =
7639 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7640
7641 // Restore the stack pointer.
7642 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7643
7644 // Store the old link SP.
7645 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7646}
7647
7648SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7649 MachineFunction &MF = DAG.getMachineFunction();
7650 bool isPPC64 = Subtarget.isPPC64();
7651 EVT PtrVT = getPointerTy(MF.getDataLayout());
7652
7653 // Get current frame pointer save index. The users of this index will be
7654 // primarily DYNALLOC instructions.
7655 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7656 int RASI = FI->getReturnAddrSaveIndex();
7657
7658 // If the frame pointer save index hasn't been defined yet.
7659 if (!RASI) {
7660 // Find out what the fix offset of the frame pointer save area.
7661 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7662 // Allocate the frame index for frame pointer save area.
7663 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7664 // Save the result.
7665 FI->setReturnAddrSaveIndex(RASI);
7666 }
7667 return DAG.getFrameIndex(RASI, PtrVT);
7668}
7669
7670SDValue
7671PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7672 MachineFunction &MF = DAG.getMachineFunction();
7673 bool isPPC64 = Subtarget.isPPC64();
7674 EVT PtrVT = getPointerTy(MF.getDataLayout());
7675
7676 // Get current frame pointer save index. The users of this index will be
7677 // primarily DYNALLOC instructions.
7678 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7679 int FPSI = FI->getFramePointerSaveIndex();
7680
7681 // If the frame pointer save index hasn't been defined yet.
7682 if (!FPSI) {
7683 // Find out what the fix offset of the frame pointer save area.
7684 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7685 // Allocate the frame index for frame pointer save area.
7686 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7687 // Save the result.
7688 FI->setFramePointerSaveIndex(FPSI);
7689 }
7690 return DAG.getFrameIndex(FPSI, PtrVT);
7691}
7692
7693SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7694 SelectionDAG &DAG) const {
7695 MachineFunction &MF = DAG.getMachineFunction();
7696 // Get the inputs.
7697 SDValue Chain = Op.getOperand(0);
7698 SDValue Size = Op.getOperand(1);
7699 SDLoc dl(Op);
7700
7701 // Get the correct type for pointers.
7702 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7703 // Negate the size.
7704 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7705 DAG.getConstant(0, dl, PtrVT), Size);
7706 // Construct a node for the frame pointer save index.
7707 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7708 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7709 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7710 if (hasInlineStackProbe(MF))
7711 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7712 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7713}
7714
7715SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7716 SelectionDAG &DAG) const {
7717 MachineFunction &MF = DAG.getMachineFunction();
7718
7719 bool isPPC64 = Subtarget.isPPC64();
7720 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7721
7722 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7723 return DAG.getFrameIndex(FI, PtrVT);
7724}
7725
7726SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7727 SelectionDAG &DAG) const {
7728 SDLoc DL(Op);
7729 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7730 DAG.getVTList(MVT::i32, MVT::Other),
7731 Op.getOperand(0), Op.getOperand(1));
7732}
7733
7734SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7735 SelectionDAG &DAG) const {
7736 SDLoc DL(Op);
7737 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7738 Op.getOperand(0), Op.getOperand(1));
7739}
7740
7741SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7742 if (Op.getValueType().isVector())
7743 return LowerVectorLoad(Op, DAG);
7744
7745 assert(Op.getValueType() == MVT::i1 &&(static_cast <bool> (Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 loads") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7746, __extension__
__PRETTY_FUNCTION__))
7746 "Custom lowering only for i1 loads")(static_cast <bool> (Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 loads") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7746, __extension__
__PRETTY_FUNCTION__))
;
7747
7748 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7749
7750 SDLoc dl(Op);
7751 LoadSDNode *LD = cast<LoadSDNode>(Op);
7752
7753 SDValue Chain = LD->getChain();
7754 SDValue BasePtr = LD->getBasePtr();
7755 MachineMemOperand *MMO = LD->getMemOperand();
7756
7757 SDValue NewLD =
7758 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7759 BasePtr, MVT::i8, MMO);
7760 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7761
7762 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7763 return DAG.getMergeValues(Ops, dl);
7764}
7765
7766SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7767 if (Op.getOperand(1).getValueType().isVector())
7768 return LowerVectorStore(Op, DAG);
7769
7770 assert(Op.getOperand(1).getValueType() == MVT::i1 &&(static_cast <bool> (Op.getOperand(1).getValueType() ==
MVT::i1 && "Custom lowering only for i1 stores") ? void
(0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7771, __extension__
__PRETTY_FUNCTION__))
7771 "Custom lowering only for i1 stores")(static_cast <bool> (Op.getOperand(1).getValueType() ==
MVT::i1 && "Custom lowering only for i1 stores") ? void
(0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7771, __extension__
__PRETTY_FUNCTION__))
;
7772
7773 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7774
7775 SDLoc dl(Op);
7776 StoreSDNode *ST = cast<StoreSDNode>(Op);
7777
7778 SDValue Chain = ST->getChain();
7779 SDValue BasePtr = ST->getBasePtr();
7780 SDValue Value = ST->getValue();
7781 MachineMemOperand *MMO = ST->getMemOperand();
7782
7783 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7784 Value);
7785 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7786}
7787
7788// FIXME: Remove this once the ANDI glue bug is fixed:
7789SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7790 assert(Op.getValueType() == MVT::i1 &&(static_cast <bool> (Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 results") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7791, __extension__
__PRETTY_FUNCTION__))
7791 "Custom lowering only for i1 results")(static_cast <bool> (Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 results") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7791, __extension__
__PRETTY_FUNCTION__))
;
7792
7793 SDLoc DL(Op);
7794 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7795}
7796
7797SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7798 SelectionDAG &DAG) const {
7799
7800 // Implements a vector truncate that fits in a vector register as a shuffle.
7801 // We want to legalize vector truncates down to where the source fits in
7802 // a vector register (and target is therefore smaller than vector register
7803 // size). At that point legalization will try to custom lower the sub-legal
7804 // result and get here - where we can contain the truncate as a single target
7805 // operation.
7806
7807 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7808 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7809 //
7810 // We will implement it for big-endian ordering as this (where x denotes
7811 // undefined):
7812 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7813 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7814 //
7815 // The same operation in little-endian ordering will be:
7816 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7817 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7818
7819 EVT TrgVT = Op.getValueType();
7820 assert(TrgVT.isVector() && "Vector type expected.")(static_cast <bool> (TrgVT.isVector() && "Vector type expected."
) ? void (0) : __assert_fail ("TrgVT.isVector() && \"Vector type expected.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 7820, __extension__
__PRETTY_FUNCTION__))
;
7821 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7822 EVT EltVT = TrgVT.getVectorElementType();
7823 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7824 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7825 !isPowerOf2_32(EltVT.getSizeInBits()))
7826 return SDValue();
7827
7828 SDValue N1 = Op.getOperand(0);
7829 EVT SrcVT = N1.getValueType();
7830 unsigned SrcSize = SrcVT.getSizeInBits();
7831 if (SrcSize > 256 ||
7832 !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7833 !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7834 return SDValue();
7835 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7836 return SDValue();
7837
7838 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7839 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7840
7841 SDLoc DL(Op);
7842 SDValue Op1, Op2;
7843 if (SrcSize == 256) {
7844 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7845 EVT SplitVT =
7846 N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7847 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7848 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7849 DAG.getConstant(0, DL, VecIdxTy));
7850 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7851 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7852 }
7853 else {
7854 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7855 Op2 = DAG.getUNDEF(WideVT);
7856 }
7857
7858 // First list the elements we want to keep.
7859 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7860 SmallVector<int, 16> ShuffV;
7861 if (Subtarget.isLittleEndian())
7862 for (unsigned i = 0; i < TrgNumElts; ++i)
7863 ShuffV.push_back(i * SizeMult);
7864 else
7865 for (unsigned i = 1; i <= TrgNumElts; ++i)
7866 ShuffV.push_back(i * SizeMult - 1);
7867
7868 // Populate the remaining elements with undefs.
7869 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7870 // ShuffV.push_back(i + WideNumElts);
7871 ShuffV.push_back(WideNumElts + 1);
7872
7873 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7874 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7875 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7876}
7877
7878/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7879/// possible.
7880SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7881 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7882 EVT ResVT = Op.getValueType();
7883 EVT CmpVT = Op.getOperand(0).getValueType();
7884 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7885 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7886 SDLoc dl(Op);
7887
7888 // Without power9-vector, we don't have native instruction for f128 comparison.
7889 // Following transformation to libcall is needed for setcc:
7890 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7891 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7892 SDValue Z = DAG.getSetCC(
7893 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7894 LHS, RHS, CC);
7895 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7896 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7897 }
7898
7899 // Not FP, or using SPE? Not a fsel.
7900 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7901 Subtarget.hasSPE())
7902 return Op;
7903
7904 SDNodeFlags Flags = Op.getNode()->getFlags();
7905
7906 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
7907 // presence of infinities.
7908 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7909 switch (CC) {
7910 default:
7911 break;
7912 case ISD::SETOGT:
7913 case ISD::SETGT:
7914 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
7915 case ISD::SETOLT:
7916 case ISD::SETLT:
7917 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
7918 }
7919 }
7920
7921 // We might be able to do better than this under some circumstances, but in
7922 // general, fsel-based lowering of select is a finite-math-only optimization.
7923 // For more information, see section F.3 of the 2.06 ISA specification.
7924 // With ISA 3.0
7925 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7926 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7927 return Op;
7928
7929 // If the RHS of the comparison is a 0.0, we don't need to do the
7930 // subtraction at all.
7931 SDValue Sel1;
7932 if (isFloatingPointZero(RHS))
7933 switch (CC) {
7934 default: break; // SETUO etc aren't handled by fsel.
7935 case ISD::SETNE:
7936 std::swap(TV, FV);
7937 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7938 case ISD::SETEQ:
7939 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7940 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7941 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7942 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7943 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7944 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7945 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7946 case ISD::SETULT:
7947 case ISD::SETLT:
7948 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7949 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7950 case ISD::SETOGE:
7951 case ISD::SETGE:
7952 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7953 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7954 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7955 case ISD::SETUGT:
7956 case ISD::SETGT:
7957 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7958 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7959 case ISD::SETOLE:
7960 case ISD::SETLE:
7961 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7962 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7963 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7964 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7965 }
7966
7967 SDValue Cmp;
7968 switch (CC) {
7969 default: break; // SETUO etc aren't handled by fsel.
7970 case ISD::SETNE:
7971 std::swap(TV, FV);
7972 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7973 case ISD::SETEQ:
7974 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7975 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7976 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7977 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7978 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7979 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7980 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7981 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7982 case ISD::SETULT:
7983 case ISD::SETLT:
7984 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7985 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7986 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7987 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7988 case ISD::SETOGE:
7989 case ISD::SETGE:
7990 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7991 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7992 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7993 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7994 case ISD::SETUGT:
7995 case ISD::SETGT:
7996 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7997 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7998 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7999 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8000 case ISD::SETOLE:
8001 case ISD::SETLE:
8002 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8003 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8004 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8005 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8006 }
8007 return Op;
8008}
8009
8010static unsigned getPPCStrictOpcode(unsigned Opc) {
8011 switch (Opc) {
8012 default:
8013 llvm_unreachable("No strict version of this opcode!")::llvm::llvm_unreachable_internal("No strict version of this opcode!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8013)
;
8014 case PPCISD::FCTIDZ:
8015 return PPCISD::STRICT_FCTIDZ;
8016 case PPCISD::FCTIWZ:
8017 return PPCISD::STRICT_FCTIWZ;
8018 case PPCISD::FCTIDUZ:
8019 return PPCISD::STRICT_FCTIDUZ;
8020 case PPCISD::FCTIWUZ:
8021 return PPCISD::STRICT_FCTIWUZ;
8022 case PPCISD::FCFID:
8023 return PPCISD::STRICT_FCFID;
8024 case PPCISD::FCFIDU:
8025 return PPCISD::STRICT_FCFIDU;
8026 case PPCISD::FCFIDS:
8027 return PPCISD::STRICT_FCFIDS;
8028 case PPCISD::FCFIDUS:
8029 return PPCISD::STRICT_FCFIDUS;
8030 }
8031}
8032
8033static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8034 const PPCSubtarget &Subtarget) {
8035 SDLoc dl(Op);
8036 bool IsStrict = Op->isStrictFPOpcode();
8037 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8038 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8039
8040 // TODO: Any other flags to propagate?
8041 SDNodeFlags Flags;
8042 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8043
8044 // For strict nodes, source is the second operand.
8045 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8046 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8047 assert(Src.getValueType().isFloatingPoint())(static_cast <bool> (Src.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("Src.getValueType().isFloatingPoint()"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8047, __extension__
__PRETTY_FUNCTION__))
;
8048 if (Src.getValueType() == MVT::f32) {
8049 if (IsStrict) {
8050 Src =
8051 DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8052 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8053 Chain = Src.getValue(1);
8054 } else
8055 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8056 }
8057 SDValue Conv;
8058 unsigned Opc = ISD::DELETED_NODE;
8059 switch (Op.getSimpleValueType().SimpleTy) {
8060 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8060)
;
8061 case MVT::i32:
8062 Opc = IsSigned ? PPCISD::FCTIWZ
8063 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8064 break;
8065 case MVT::i64:
8066 assert((IsSigned || Subtarget.hasFPCVT()) &&(static_cast <bool> ((IsSigned || Subtarget.hasFPCVT())
&& "i64 FP_TO_UINT is supported only with FPCVT") ? void
(0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8067, __extension__
__PRETTY_FUNCTION__))
8067 "i64 FP_TO_UINT is supported only with FPCVT")(static_cast <bool> ((IsSigned || Subtarget.hasFPCVT())
&& "i64 FP_TO_UINT is supported only with FPCVT") ? void
(0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8067, __extension__
__PRETTY_FUNCTION__))
;
8068 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8069 }
8070 if (IsStrict) {
8071 Opc = getPPCStrictOpcode(Opc);
8072 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8073 {Chain, Src}, Flags);
8074 } else {
8075 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8076 }
8077 return Conv;
8078}
8079
8080void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8081 SelectionDAG &DAG,
8082 const SDLoc &dl) const {
8083 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8084 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8085 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8086 bool IsStrict = Op->isStrictFPOpcode();
8087
8088 // Convert the FP value to an int value through memory.
8089 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8090 (IsSigned || Subtarget.hasFPCVT());
8091 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8092 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8093 MachinePointerInfo MPI =
8094 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8095
8096 // Emit a store to the stack slot.
8097 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8098 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8099 if (i32Stack) {
8100 MachineFunction &MF = DAG.getMachineFunction();
8101 Alignment = Align(4);
8102 MachineMemOperand *MMO =
8103 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8104 SDValue Ops[] = { Chain, Tmp, FIPtr };
8105 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8106 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8107 } else
8108 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8109
8110 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8111 // add in a bias on big endian.
8112 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8113 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8114 DAG.getConstant(4, dl, FIPtr.getValueType()));
8115 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8116 }
8117
8118 RLI.Chain = Chain;
8119 RLI.Ptr = FIPtr;
8120 RLI.MPI = MPI;
8121 RLI.Alignment = Alignment;
8122}
8123
8124/// Custom lowers floating point to integer conversions to use
8125/// the direct move instructions available in ISA 2.07 to avoid the
8126/// need for load/store combinations.
8127SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8128 SelectionDAG &DAG,
8129 const SDLoc &dl) const {
8130 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8131 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8132 if (Op->isStrictFPOpcode())
8133 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8134 else
8135 return Mov;
8136}
8137
8138SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8139 const SDLoc &dl) const {
8140 bool IsStrict = Op->isStrictFPOpcode();
8141 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8142 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8143 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8144 EVT SrcVT = Src.getValueType();
8145 EVT DstVT = Op.getValueType();
8146
8147 // FP to INT conversions are legal for f128.
8148 if (SrcVT == MVT::f128)
8149 return Subtarget.hasP9Vector() ? Op : SDValue();
8150
8151 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8152 // PPC (the libcall is not available).
8153 if (SrcVT == MVT::ppcf128) {
8154 if (DstVT == MVT::i32) {
8155 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8156 // set other fast-math flags to FP operations in both strict and
8157 // non-strict cases. (FP_TO_SINT, FSUB)
8158 SDNodeFlags Flags;
8159 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8160
8161 if (IsSigned) {
8162 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8163 DAG.getIntPtrConstant(0, dl));
8164 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8165 DAG.getIntPtrConstant(1, dl));
8166
8167 // Add the two halves of the long double in round-to-zero mode, and use
8168 // a smaller FP_TO_SINT.
8169 if (IsStrict) {
8170 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8171 DAG.getVTList(MVT::f64, MVT::Other),
8172 {Op.getOperand(0), Lo, Hi}, Flags);
8173 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8174 DAG.getVTList(MVT::i32, MVT::Other),
8175 {Res.getValue(1), Res}, Flags);
8176 } else {
8177 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8178 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8179 }
8180 } else {
8181 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8182 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8183 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8184 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8185 if (IsStrict) {
8186 // Sel = Src < 0x80000000
8187 // FltOfs = select Sel, 0.0, 0x80000000
8188 // IntOfs = select Sel, 0, 0x80000000
8189 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8190 SDValue Chain = Op.getOperand(0);
8191 EVT SetCCVT =
8192 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8193 EVT DstSetCCVT =
8194 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8195 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8196 Chain, true);
8197 Chain = Sel.getValue(1);
8198
8199 SDValue FltOfs = DAG.getSelect(
8200 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8201 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8202
8203 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8204 DAG.getVTList(SrcVT, MVT::Other),
8205 {Chain, Src, FltOfs}, Flags);
8206 Chain = Val.getValue(1);
8207 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8208 DAG.getVTList(DstVT, MVT::Other),
8209 {Chain, Val}, Flags);
8210 Chain = SInt.getValue(1);
8211 SDValue IntOfs = DAG.getSelect(
8212 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8213 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8214 return DAG.getMergeValues({Result, Chain}, dl);
8215 } else {
8216 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8217 // FIXME: generated code sucks.
8218 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8219 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8220 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8221 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8222 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8223 }
8224 }
8225 }
8226
8227 return SDValue();
8228 }
8229
8230 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8231 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8232
8233 ReuseLoadInfo RLI;
8234 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8235
8236 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8237 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8238}
8239
8240// We're trying to insert a regular store, S, and then a load, L. If the
8241// incoming value, O, is a load, we might just be able to have our load use the
8242// address used by O. However, we don't know if anything else will store to
8243// that address before we can load from it. To prevent this situation, we need
8244// to insert our load, L, into the chain as a peer of O. To do this, we give L
8245// the same chain operand as O, we create a token factor from the chain results
8246// of O and L, and we replace all uses of O's chain result with that token
8247// factor (see spliceIntoChain below for this last part).
8248bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8249 ReuseLoadInfo &RLI,
8250 SelectionDAG &DAG,
8251 ISD::LoadExtType ET) const {
8252 // Conservatively skip reusing for constrained FP nodes.
8253 if (Op->isStrictFPOpcode())
8254 return false;
8255
8256 SDLoc dl(Op);
8257 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8258 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8259 if (ET == ISD::NON_EXTLOAD &&
8260 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8261 isOperationLegalOrCustom(Op.getOpcode(),
8262 Op.getOperand(0).getValueType())) {
8263
8264 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8265 return true;
8266 }
8267
8268 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8269 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8270 LD->isNonTemporal())
8271 return false;
8272 if (LD->getMemoryVT() != MemVT)
8273 return false;
8274
8275 // If the result of the load is an illegal type, then we can't build a
8276 // valid chain for reuse since the legalised loads and token factor node that
8277 // ties the legalised loads together uses a different output chain then the
8278 // illegal load.
8279 if (!isTypeLegal(LD->getValueType(0)))
8280 return false;
8281
8282 RLI.Ptr = LD->getBasePtr();
8283 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8284 assert(LD->getAddressingMode() == ISD::PRE_INC &&(static_cast <bool> (LD->getAddressingMode() == ISD::
PRE_INC && "Non-pre-inc AM on PPC?") ? void (0) : __assert_fail
("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8285, __extension__
__PRETTY_FUNCTION__))
8285 "Non-pre-inc AM on PPC?")(static_cast <bool> (LD->getAddressingMode() == ISD::
PRE_INC && "Non-pre-inc AM on PPC?") ? void (0) : __assert_fail
("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8285, __extension__
__PRETTY_FUNCTION__))
;
8286 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8287 LD->getOffset());
8288 }
8289
8290 RLI.Chain = LD->getChain();
8291 RLI.MPI = LD->getPointerInfo();
8292 RLI.IsDereferenceable = LD->isDereferenceable();
8293 RLI.IsInvariant = LD->isInvariant();
8294 RLI.Alignment = LD->getAlign();
8295 RLI.AAInfo = LD->getAAInfo();
8296 RLI.Ranges = LD->getRanges();
8297
8298 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8299 return true;
8300}
8301
8302// Given the head of the old chain, ResChain, insert a token factor containing
8303// it and NewResChain, and make users of ResChain now be users of that token
8304// factor.
8305// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8306void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8307 SDValue NewResChain,
8308 SelectionDAG &DAG) const {
8309 if (!ResChain)
8310 return;
8311
8312 SDLoc dl(NewResChain);
8313
8314 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8315 NewResChain, DAG.getUNDEF(MVT::Other));
8316 assert(TF.getNode() != NewResChain.getNode() &&(static_cast <bool> (TF.getNode() != NewResChain.getNode
() && "A new TF really is required here") ? void (0) :
__assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8317, __extension__
__PRETTY_FUNCTION__))
8317 "A new TF really is required here")(static_cast <bool> (TF.getNode() != NewResChain.getNode
() && "A new TF really is required here") ? void (0) :
__assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8317, __extension__
__PRETTY_FUNCTION__))
;
8318
8319 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8320 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8321}
8322
8323/// Analyze profitability of direct move
8324/// prefer float load to int load plus direct move
8325/// when there is no integer use of int load
8326bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8327 SDNode *Origin = Op.getOperand(0).getNode();
8328 if (Origin->getOpcode() != ISD::LOAD)
8329 return true;
8330
8331 // If there is no LXSIBZX/LXSIHZX, like Power8,
8332 // prefer direct move if the memory size is 1 or 2 bytes.
8333 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8334 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8335 return true;
8336
8337 for (SDNode::use_iterator UI = Origin->use_begin(),
8338 UE = Origin->use_end();
8339 UI != UE; ++UI) {
8340
8341 // Only look at the users of the loaded value.
8342 if (UI.getUse().get().getResNo() != 0)
8343 continue;
8344
8345 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8346 UI->getOpcode() != ISD::UINT_TO_FP &&
8347 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8348 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8349 return true;
8350 }
8351
8352 return false;
8353}
8354
8355static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8356 const PPCSubtarget &Subtarget,
8357 SDValue Chain = SDValue()) {
8358 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8359 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8360 SDLoc dl(Op);
8361
8362 // TODO: Any other flags to propagate?
8363 SDNodeFlags Flags;
8364 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8365
8366 // If we have FCFIDS, then use it when converting to single-precision.
8367 // Otherwise, convert to double-precision and then round.
8368 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8369 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8370 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8371 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8372 if (Op->isStrictFPOpcode()) {
8373 if (!Chain)
8374 Chain = Op.getOperand(0);
8375 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8376 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8377 } else
8378 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8379}
8380
8381/// Custom lowers integer to floating point conversions to use
8382/// the direct move instructions available in ISA 2.07 to avoid the
8383/// need for load/store combinations.
8384SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8385 SelectionDAG &DAG,
8386 const SDLoc &dl) const {
8387 assert((Op.getValueType() == MVT::f32 ||(static_cast <bool> ((Op.getValueType() == MVT::f32 || Op
.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8389, __extension__
__PRETTY_FUNCTION__))
8388 Op.getValueType() == MVT::f64) &&(static_cast <bool> ((Op.getValueType() == MVT::f32 || Op
.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8389, __extension__
__PRETTY_FUNCTION__))
8389 "Invalid floating point type as target of conversion")(static_cast <bool> ((Op.getValueType() == MVT::f32 || Op
.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8389, __extension__
__PRETTY_FUNCTION__))
;
8390 assert(Subtarget.hasFPCVT() &&(static_cast <bool> (Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? void (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8391, __extension__
__PRETTY_FUNCTION__))
8391 "Int to FP conversions with direct moves require FPCVT")(static_cast <bool> (Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? void (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8391, __extension__
__PRETTY_FUNCTION__))
;
8392 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8393 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8394 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8395 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8396 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8397 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8398 return convertIntToFP(Op, Mov, DAG, Subtarget);
8399}
8400
8401static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8402
8403 EVT VecVT = Vec.getValueType();
8404 assert(VecVT.isVector() && "Expected a vector type.")(static_cast <bool> (VecVT.isVector() && "Expected a vector type."
) ? void (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8404, __extension__
__PRETTY_FUNCTION__))
;
8405 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")(static_cast <bool> (VecVT.getSizeInBits() < 128 &&
"Vector is already full width.") ? void (0) : __assert_fail (
"VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8405, __extension__
__PRETTY_FUNCTION__))
;
8406
8407 EVT EltVT = VecVT.getVectorElementType();
8408 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8409 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8410
8411 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8412 SmallVector<SDValue, 16> Ops(NumConcat);
8413 Ops[0] = Vec;
8414 SDValue UndefVec = DAG.getUNDEF(VecVT);
8415 for (unsigned i = 1; i < NumConcat; ++i)
8416 Ops[i] = UndefVec;
8417
8418 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8419}
8420
8421SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8422 const SDLoc &dl) const {
8423 bool IsStrict = Op->isStrictFPOpcode();
8424 unsigned Opc = Op.getOpcode();
8425 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8426 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||(static_cast <bool> ((Opc == ISD::UINT_TO_FP || Opc == ISD
::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::
STRICT_SINT_TO_FP) && "Unexpected conversion type") ?
void (0) : __assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8428, __extension__
__PRETTY_FUNCTION__))
8427 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&(static_cast <bool> ((Opc == ISD::UINT_TO_FP || Opc == ISD
::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::
STRICT_SINT_TO_FP) && "Unexpected conversion type") ?
void (0) : __assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8428, __extension__
__PRETTY_FUNCTION__))
8428 "Unexpected conversion type")(static_cast <bool> ((Opc == ISD::UINT_TO_FP || Opc == ISD
::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::
STRICT_SINT_TO_FP) && "Unexpected conversion type") ?
void (0) : __assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8428, __extension__
__PRETTY_FUNCTION__))
;
8429 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(static_cast <bool> ((Op.getValueType() == MVT::v2f64 ||
Op.getValueType() == MVT::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8430, __extension__
__PRETTY_FUNCTION__))
8430 "Supports conversions to v2f64/v4f32 only.")(static_cast <bool> ((Op.getValueType() == MVT::v2f64 ||
Op.getValueType() == MVT::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8430, __extension__
__PRETTY_FUNCTION__))
;
8431
8432 // TODO: Any other flags to propagate?
8433 SDNodeFlags Flags;
8434 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8435
8436 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8437 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8438
8439 SDValue Wide = widenVec(DAG, Src, dl);
8440 EVT WideVT = Wide.getValueType();
8441 unsigned WideNumElts = WideVT.getVectorNumElements();
8442 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8443
8444 SmallVector<int, 16> ShuffV;
8445 for (unsigned i = 0; i < WideNumElts; ++i)
8446 ShuffV.push_back(i + WideNumElts);
8447
8448 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8449 int SaveElts = FourEltRes ? 4 : 2;
8450 if (Subtarget.isLittleEndian())
8451 for (int i = 0; i < SaveElts; i++)
8452 ShuffV[i * Stride] = i;
8453 else
8454 for (int i = 1; i <= SaveElts; i++)
8455 ShuffV[i * Stride - 1] = i - 1;
8456
8457 SDValue ShuffleSrc2 =
8458 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8459 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8460
8461 SDValue Extend;
8462 if (SignedConv) {
8463 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8464 EVT ExtVT = Src.getValueType();
8465 if (Subtarget.hasP9Altivec())
8466 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8467 IntermediateVT.getVectorNumElements());
8468
8469 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8470 DAG.getValueType(ExtVT));
8471 } else
8472 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8473
8474 if (IsStrict)
8475 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8476 {Op.getOperand(0), Extend}, Flags);
8477
8478 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8479}
8480
8481SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8482 SelectionDAG &DAG) const {
8483 SDLoc dl(Op);
8484 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8485 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8486 bool IsStrict = Op->isStrictFPOpcode();
8487 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8488 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8489
8490 // TODO: Any other flags to propagate?
8491 SDNodeFlags Flags;
8492 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8493
8494 EVT InVT = Src.getValueType();
8495 EVT OutVT = Op.getValueType();
8496 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8497 isOperationCustom(Op.getOpcode(), InVT))
8498 return LowerINT_TO_FPVector(Op, DAG, dl);
8499
8500 // Conversions to f128 are legal.
8501 if (Op.getValueType() == MVT::f128)
8502 return Subtarget.hasP9Vector() ? Op : SDValue();
8503
8504 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8505 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8506 return SDValue();
8507
8508 if (Src.getValueType() == MVT::i1) {
8509 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8510 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8511 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8512 if (IsStrict)
8513 return DAG.getMergeValues({Sel, Chain}, dl);
8514 else
8515 return Sel;
8516 }
8517
8518 // If we have direct moves, we can do all the conversion, skip the store/load
8519 // however, without FPCVT we can't do most conversions.
8520 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8521 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8522 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8523
8524 assert((IsSigned || Subtarget.hasFPCVT()) &&(static_cast <bool> ((IsSigned || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? void
(0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8525, __extension__
__PRETTY_FUNCTION__))
8525 "UINT_TO_FP is supported only with FPCVT")(static_cast <bool> ((IsSigned || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? void
(0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8525, __extension__
__PRETTY_FUNCTION__))
;
8526
8527 if (Src.getValueType() == MVT::i64) {
8528 SDValue SINT = Src;
8529 // When converting to single-precision, we actually need to convert
8530 // to double-precision first and then round to single-precision.
8531 // To avoid double-rounding effects during that operation, we have
8532 // to prepare the input operand. Bits that might be truncated when
8533 // converting to double-precision are replaced by a bit that won't
8534 // be lost at this stage, but is below the single-precision rounding
8535 // position.
8536 //
8537 // However, if -enable-unsafe-fp-math is in effect, accept double
8538 // rounding to avoid the extra overhead.
8539 if (Op.getValueType() == MVT::f32 &&
8540 !Subtarget.hasFPCVT() &&
8541 !DAG.getTarget().Options.UnsafeFPMath) {
8542
8543 // Twiddle input to make sure the low 11 bits are zero. (If this
8544 // is the case, we are guaranteed the value will fit into the 53 bit
8545 // mantissa of an IEEE double-precision value without rounding.)
8546 // If any of those low 11 bits were not zero originally, make sure
8547 // bit 12 (value 2048) is set instead, so that the final rounding
8548 // to single-precision gets the correct result.
8549 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8550 SINT, DAG.getConstant(2047, dl, MVT::i64));
8551 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8552 Round, DAG.getConstant(2047, dl, MVT::i64));
8553 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8554 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8555 Round, DAG.getConstant(-2048, dl, MVT::i64));
8556
8557 // However, we cannot use that value unconditionally: if the magnitude
8558 // of the input value is small, the bit-twiddling we did above might
8559 // end up visibly changing the output. Fortunately, in that case, we
8560 // don't need to twiddle bits since the original input will convert
8561 // exactly to double-precision floating-point already. Therefore,
8562 // construct a conditional to use the original value if the top 11
8563 // bits are all sign-bit copies, and use the rounded value computed
8564 // above otherwise.
8565 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8566 SINT, DAG.getConstant(53, dl, MVT::i32));
8567 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8568 Cond, DAG.getConstant(1, dl, MVT::i64));
8569 Cond = DAG.getSetCC(
8570 dl,
8571 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8572 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8573
8574 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8575 }
8576
8577 ReuseLoadInfo RLI;
8578 SDValue Bits;
8579
8580 MachineFunction &MF = DAG.getMachineFunction();
8581 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8582 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8583 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8584 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8585 } else if (Subtarget.hasLFIWAX() &&
8586 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8587 MachineMemOperand *MMO =
8588 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8589 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8590 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8591 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8592 DAG.getVTList(MVT::f64, MVT::Other),
8593 Ops, MVT::i32, MMO);
8594 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8595 } else if (Subtarget.hasFPCVT() &&
8596 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8597 MachineMemOperand *MMO =
8598 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8599 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8600 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8601 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8602 DAG.getVTList(MVT::f64, MVT::Other),
8603 Ops, MVT::i32, MMO);
8604 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8605 } else if (((Subtarget.hasLFIWAX() &&
8606 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8607 (Subtarget.hasFPCVT() &&
8608 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8609 SINT.getOperand(0).getValueType() == MVT::i32) {
8610 MachineFrameInfo &MFI = MF.getFrameInfo();
8611 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8612
8613 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8614 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8615
8616 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8617 MachinePointerInfo::getFixedStack(
8618 DAG.getMachineFunction(), FrameIdx));
8619 Chain = Store;
8620
8621 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&(static_cast <bool> (cast<StoreSDNode>(Store)->
getMemoryVT() == MVT::i32 && "Expected an i32 store")
? void (0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8622, __extension__
__PRETTY_FUNCTION__))
8622 "Expected an i32 store")(static_cast <bool> (cast<StoreSDNode>(Store)->
getMemoryVT() == MVT::i32 && "Expected an i32 store")
? void (0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8622, __extension__
__PRETTY_FUNCTION__))
;
8623
8624 RLI.Ptr = FIdx;
8625 RLI.Chain = Chain;
8626 RLI.MPI =
8627 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8628 RLI.Alignment = Align(4);
8629
8630 MachineMemOperand *MMO =
8631 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8632 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8633 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8634 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8635 PPCISD::LFIWZX : PPCISD::LFIWAX,
8636 dl, DAG.getVTList(MVT::f64, MVT::Other),
8637 Ops, MVT::i32, MMO);
8638 Chain = Bits.getValue(1);
8639 } else
8640 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8641
8642 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8643 if (IsStrict)
8644 Chain = FP.getValue(1);
8645
8646 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8647 if (IsStrict)
8648 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8649 DAG.getVTList(MVT::f32, MVT::Other),
8650 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8651 else
8652 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8653 DAG.getIntPtrConstant(0, dl));
8654 }
8655 return FP;
8656 }
8657
8658 assert(Src.getValueType() == MVT::i32 &&(static_cast <bool> (Src.getValueType() == MVT::i32 &&
"Unhandled INT_TO_FP type in custom expander!") ? void (0) :
__assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8659, __extension__
__PRETTY_FUNCTION__))
8659 "Unhandled INT_TO_FP type in custom expander!")(static_cast <bool> (Src.getValueType() == MVT::i32 &&
"Unhandled INT_TO_FP type in custom expander!") ? void (0) :
__assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8659, __extension__
__PRETTY_FUNCTION__))
;
8660 // Since we only generate this in 64-bit mode, we can take advantage of
8661 // 64-bit registers. In particular, sign extend the input value into the
8662 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8663 // then lfd it and fcfid it.
8664 MachineFunction &MF = DAG.getMachineFunction();
8665 MachineFrameInfo &MFI = MF.getFrameInfo();
8666 EVT PtrVT = getPointerTy(MF.getDataLayout());
8667
8668 SDValue Ld;
8669 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8670 ReuseLoadInfo RLI;
8671 bool ReusingLoad;
8672 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8673 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8674 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8675
8676 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8677 MachinePointerInfo::getFixedStack(
8678 DAG.getMachineFunction(), FrameIdx));
8679 Chain = Store;
8680
8681 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&(static_cast <bool> (cast<StoreSDNode>(Store)->
getMemoryVT() == MVT::i32 && "Expected an i32 store")
? void (0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8682, __extension__
__PRETTY_FUNCTION__))
8682 "Expected an i32 store")(static_cast <bool> (cast<StoreSDNode>(Store)->
getMemoryVT() == MVT::i32 && "Expected an i32 store")
? void (0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8682, __extension__
__PRETTY_FUNCTION__))
;
8683
8684 RLI.Ptr = FIdx;
8685 RLI.Chain = Chain;
8686 RLI.MPI =
8687 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8688 RLI.Alignment = Align(4);
8689 }
8690
8691 MachineMemOperand *MMO =
8692 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8693 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8694 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8695 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8696 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8697 MVT::i32, MMO);
8698 Chain = Ld.getValue(1);
8699 if (ReusingLoad)
8700 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8701 } else {
8702 assert(Subtarget.isPPC64() &&(static_cast <bool> (Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? void (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8703, __extension__
__PRETTY_FUNCTION__))
8703 "i32->FP without LFIWAX supported only on PPC64")(static_cast <bool> (Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? void (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8703, __extension__
__PRETTY_FUNCTION__))
;
8704
8705 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8706 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8707
8708 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8709
8710 // STD the extended value into the stack slot.
8711 SDValue Store = DAG.getStore(
8712 Chain, dl, Ext64, FIdx,
8713 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8714 Chain = Store;
8715
8716 // Load the value as a double.
8717 Ld = DAG.getLoad(
8718 MVT::f64, dl, Chain, FIdx,
8719 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8720 Chain = Ld.getValue(1);
8721 }
8722
8723 // FCFID it and return it.
8724 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8725 if (IsStrict)
8726 Chain = FP.getValue(1);
8727 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8728 if (IsStrict)
8729 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8730 DAG.getVTList(MVT::f32, MVT::Other),
8731 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8732 else
8733 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8734 DAG.getIntPtrConstant(0, dl));
8735 }
8736 return FP;
8737}
8738
8739SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8740 SelectionDAG &DAG) const {
8741 SDLoc dl(Op);
8742 /*
8743 The rounding mode is in bits 30:31 of FPSR, and has the following
8744 settings:
8745 00 Round to nearest
8746 01 Round to 0
8747 10 Round to +inf
8748 11 Round to -inf
8749
8750 FLT_ROUNDS, on the other hand, expects the following:
8751 -1 Undefined
8752 0 Round to 0
8753 1 Round to nearest
8754 2 Round to +inf
8755 3 Round to -inf
8756
8757 To perform the conversion, we do:
8758 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8759 */
8760
8761 MachineFunction &MF = DAG.getMachineFunction();
8762 EVT VT = Op.getValueType();
8763 EVT PtrVT = getPointerTy(MF.getDataLayout());
8764
8765 // Save FP Control Word to register
8766 SDValue Chain = Op.getOperand(0);
8767 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8768 Chain = MFFS.getValue(1);
8769
8770 SDValue CWD;
8771 if (isTypeLegal(MVT::i64)) {
8772 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8773 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8774 } else {
8775 // Save FP register to stack slot
8776 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8777 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8778 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8779
8780 // Load FP Control Word from low 32 bits of stack slot.
8781 assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&(static_cast <bool> (hasBigEndianPartOrdering(MVT::i64,
MF.getDataLayout()) && "Stack slot adjustment is valid only on big endian subtargets!"
) ? void (0) : __assert_fail ("hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) && \"Stack slot adjustment is valid only on big endian subtargets!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8782, __extension__
__PRETTY_FUNCTION__))
8782 "Stack slot adjustment is valid only on big endian subtargets!")(static_cast <bool> (hasBigEndianPartOrdering(MVT::i64,
MF.getDataLayout()) && "Stack slot adjustment is valid only on big endian subtargets!"
) ? void (0) : __assert_fail ("hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) && \"Stack slot adjustment is valid only on big endian subtargets!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8782, __extension__
__PRETTY_FUNCTION__))
;
8783 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8784 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8785 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8786 Chain = CWD.getValue(1);
8787 }
8788
8789 // Transform as necessary
8790 SDValue CWD1 =
8791 DAG.getNode(ISD::AND, dl, MVT::i32,
8792 CWD, DAG.getConstant(3, dl, MVT::i32));
8793 SDValue CWD2 =
8794 DAG.getNode(ISD::SRL, dl, MVT::i32,
8795 DAG.getNode(ISD::AND, dl, MVT::i32,
8796 DAG.getNode(ISD::XOR, dl, MVT::i32,
8797 CWD, DAG.getConstant(3, dl, MVT::i32)),
8798 DAG.getConstant(3, dl, MVT::i32)),
8799 DAG.getConstant(1, dl, MVT::i32));
8800
8801 SDValue RetVal =
8802 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8803
8804 RetVal =
8805 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8806 dl, VT, RetVal);
8807
8808 return DAG.getMergeValues({RetVal, Chain}, dl);
8809}
8810
8811SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8812 EVT VT = Op.getValueType();
8813 unsigned BitWidth = VT.getSizeInBits();
8814 SDLoc dl(Op);
8815 assert(Op.getNumOperands() == 3 &&(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8817, __extension__
__PRETTY_FUNCTION__))
8816 VT == Op.getOperand(1).getValueType() &&(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8817, __extension__
__PRETTY_FUNCTION__))
8817 "Unexpected SHL!")(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8817, __extension__
__PRETTY_FUNCTION__))
;
8818
8819 // Expand into a bunch of logical ops. Note that these ops
8820 // depend on the PPC behavior for oversized shift amounts.
8821 SDValue Lo = Op.getOperand(0);
8822 SDValue Hi = Op.getOperand(1);
8823 SDValue Amt = Op.getOperand(2);
8824 EVT AmtVT = Amt.getValueType();
8825
8826 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8827 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8828 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8829 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8830 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8831 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8832 DAG.getConstant(-BitWidth, dl, AmtVT));
8833 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8834 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8835 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8836 SDValue OutOps[] = { OutLo, OutHi };
8837 return DAG.getMergeValues(OutOps, dl);
8838}
8839
8840SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8841 EVT VT = Op.getValueType();
8842 SDLoc dl(Op);
8843 unsigned BitWidth = VT.getSizeInBits();
8844 assert(Op.getNumOperands() == 3 &&(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8846, __extension__
__PRETTY_FUNCTION__))
8845 VT == Op.getOperand(1).getValueType() &&(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8846, __extension__
__PRETTY_FUNCTION__))
8846 "Unexpected SRL!")(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8846, __extension__
__PRETTY_FUNCTION__))
;
8847
8848 // Expand into a bunch of logical ops. Note that these ops
8849 // depend on the PPC behavior for oversized shift amounts.
8850 SDValue Lo = Op.getOperand(0);
8851 SDValue Hi = Op.getOperand(1);
8852 SDValue Amt = Op.getOperand(2);
8853 EVT AmtVT = Amt.getValueType();
8854
8855 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8856 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8857 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8858 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8859 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8860 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8861 DAG.getConstant(-BitWidth, dl, AmtVT));
8862 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8863 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8864 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8865 SDValue OutOps[] = { OutLo, OutHi };
8866 return DAG.getMergeValues(OutOps, dl);
8867}
8868
8869SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8870 SDLoc dl(Op);
8871 EVT VT = Op.getValueType();
8872 unsigned BitWidth = VT.getSizeInBits();
8873 assert(Op.getNumOperands() == 3 &&(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8875, __extension__
__PRETTY_FUNCTION__))
8874 VT == Op.getOperand(1).getValueType() &&(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8875, __extension__
__PRETTY_FUNCTION__))
8875 "Unexpected SRA!")(static_cast <bool> (Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"
) ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 8875, __extension__
__PRETTY_FUNCTION__))
;
8876
8877 // Expand into a bunch of logical ops, followed by a select_cc.
8878 SDValue Lo = Op.getOperand(0);
8879 SDValue Hi = Op.getOperand(1);
8880 SDValue Amt = Op.getOperand(2);
8881 EVT AmtVT = Amt.getValueType();
8882
8883 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8884 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8885 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8886 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8887 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8888 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8889 DAG.getConstant(-BitWidth, dl, AmtVT));
8890 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8891 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8892 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8893 Tmp4, Tmp6, ISD::SETLE);
8894 SDValue OutOps[] = { OutLo, OutHi };
8895 return DAG.getMergeValues(OutOps, dl);
8896}
8897
8898SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8899 SelectionDAG &DAG) const {
8900 SDLoc dl(Op);
8901 EVT VT = Op.getValueType();
8902 unsigned BitWidth = VT.getSizeInBits();
8903
8904 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8905 SDValue X = Op.getOperand(0);
8906 SDValue Y = Op.getOperand(1);
8907 SDValue Z = Op.getOperand(2);
8908 EVT AmtVT = Z.getValueType();
8909
8910 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8911 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8912 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8913 // on PowerPC shift by BW being well defined.
8914 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8915 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8916 SDValue SubZ =
8917 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8918 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8919 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8920 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8921}
8922
8923//===----------------------------------------------------------------------===//
8924// Vector related lowering.
8925//
8926
8927/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8928/// element size of SplatSize. Cast the result to VT.
8929static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8930 SelectionDAG &DAG, const SDLoc &dl) {
8931 static const MVT VTys[] = { // canonical VT to use for each size.
8932 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8933 };
8934
8935 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8936
8937 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8938 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8939 SplatSize = 1;
8940 Val = 0xFF;
8941 }
8942
8943 EVT CanonicalVT = VTys[SplatSize-1];
8944
8945 // Build a canonical splat for this value.
8946 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8947}
8948
8949/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8950/// specified intrinsic ID.
8951static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8952 const SDLoc &dl, EVT DestVT = MVT::Other) {
8953 if (DestVT == MVT::Other) DestVT = Op.getValueType();
8954 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8955 DAG.getConstant(IID, dl, MVT::i32), Op);
8956}
8957
8958/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8959/// specified intrinsic ID.
8960static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8961 SelectionDAG &DAG, const SDLoc &dl,
8962 EVT DestVT = MVT::Other) {
8963 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8964 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8965 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8966}
8967
8968/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8969/// specified intrinsic ID.
8970static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8971 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8972 EVT DestVT = MVT::Other) {
8973 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8974 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8975 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8976}
8977
8978/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8979/// amount. The result has the specified value type.
8980static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8981 SelectionDAG &DAG, const SDLoc &dl) {
8982 // Force LHS/RHS to be the right type.
8983 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8984 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8985
8986 int Ops[16];
8987 for (unsigned i = 0; i != 16; ++i)
8988 Ops[i] = i + Amt;
8989 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8990 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8991}
8992
8993/// Do we have an efficient pattern in a .td file for this node?
8994///
8995/// \param V - pointer to the BuildVectorSDNode being matched
8996/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8997///
8998/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8999/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9000/// the opposite is true (expansion is beneficial) are:
9001/// - The node builds a vector out of integers that are not 32 or 64-bits
9002/// - The node builds a vector out of constants
9003/// - The node is a "load-and-splat"
9004/// In all other cases, we will choose to keep the BUILD_VECTOR.
9005static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9006 bool HasDirectMove,
9007 bool HasP8Vector) {
9008 EVT VecVT = V->getValueType(0);
9009 bool RightType = VecVT == MVT::v2f64 ||
9010 (HasP8Vector && VecVT == MVT::v4f32) ||
9011 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9012 if (!RightType)
9013 return false;
9014
9015 bool IsSplat = true;
9016 bool IsLoad = false;
9017 SDValue Op0 = V->getOperand(0);
9018
9019 // This function is called in a block that confirms the node is not a constant
9020 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9021 // different constants.
9022 if (V->isConstant())
9023 return false;
9024 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9025 if (V->getOperand(i).isUndef())
9026 return false;
9027 // We want to expand nodes that represent load-and-splat even if the
9028 // loaded value is a floating point truncation or conversion to int.
9029 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9030 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9031 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9032 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9033 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9034 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9035 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9036 IsLoad = true;
9037 // If the operands are different or the input is not a load and has more
9038 // uses than just this BV node, then it isn't a splat.
9039 if (V->getOperand(i) != Op0 ||
9040 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9041 IsSplat = false;
9042 }
9043 return !(IsSplat && IsLoad);
9044}
9045
9046// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9047SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9048
9049 SDLoc dl(Op);
9050 SDValue Op0 = Op->getOperand(0);
9051
9052 if ((Op.getValueType() != MVT::f128) ||
9053 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9054 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9055 (Op0.getOperand(1).getValueType() != MVT::i64))
9056 return SDValue();
9057
9058 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9059 Op0.getOperand(1));
9060}
9061
9062static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9063 const SDValue *InputLoad = &Op;
9064 if (InputLoad->getOpcode() == ISD::BITCAST)
9065 InputLoad = &InputLoad->getOperand(0);
9066 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9067 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9068 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9069 InputLoad = &InputLoad->getOperand(0);
9070 }
9071 if (InputLoad->getOpcode() != ISD::LOAD)
9072 return nullptr;
9073 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9074 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9075}
9076
9077// Convert the argument APFloat to a single precision APFloat if there is no
9078// loss in information during the conversion to single precision APFloat and the
9079// resulting number is not a denormal number. Return true if successful.
9080bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9081 APFloat APFloatToConvert = ArgAPFloat;
9082 bool LosesInfo = true;
9083 APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9084 &LosesInfo);
9085 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9086 if (Success)
9087 ArgAPFloat = APFloatToConvert;
9088 return Success;
9089}
9090
9091// Bitcast the argument APInt to a double and convert it to a single precision
9092// APFloat, bitcast the APFloat to an APInt and assign it to the original
9093// argument if there is no loss in information during the conversion from
9094// double to single precision APFloat and the resulting number is not a denormal
9095// number. Return true if successful.
9096bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9097 double DpValue = ArgAPInt.bitsToDouble();
9098 APFloat APFloatDp(DpValue);
9099 bool Success = convertToNonDenormSingle(APFloatDp);
9100 if (Success)
9101 ArgAPInt = APFloatDp.bitcastToAPInt();
9102 return Success;
9103}
9104
9105// Nondestructive check for convertTonNonDenormSingle.
9106bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9107 // Only convert if it loses info, since XXSPLTIDP should
9108 // handle the other case.
9109 APFloat APFloatToConvert = ArgAPFloat;
9110 bool LosesInfo = true;
9111 APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9112 &LosesInfo);
9113
9114 return (!LosesInfo && !APFloatToConvert.isDenormal());
9115}
9116
9117static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9118 unsigned &Opcode) {
9119 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9120 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9121 return false;
9122
9123 EVT Ty = Op->getValueType(0);
9124 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9125 // as we cannot handle extending loads for these types.
9126 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9127 ISD::isNON_EXTLoad(InputNode))
9128 return true;
9129
9130 EVT MemVT = InputNode->getMemoryVT();
9131 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9132 // memory VT is the same vector element VT type.
9133 // The loads feeding into the v8i16 and v16i8 types will be extending because
9134 // scalar i8/i16 are not legal types.
9135 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9136 (MemVT == Ty.getVectorElementType()))
9137 return true;
9138
9139 if (Ty == MVT::v2i64) {
9140 // Check the extend type, when the input type is i32, and the output vector
9141 // type is v2i64.
9142 if (MemVT == MVT::i32) {
9143 if (ISD::isZEXTLoad(InputNode))
9144 Opcode = PPCISD::ZEXT_LD_SPLAT;
9145 if (ISD::isSEXTLoad(InputNode))
9146 Opcode = PPCISD::SEXT_LD_SPLAT;
9147 }
9148 return true;
9149 }
9150 return false;
9151}
9152
9153// If this is a case we can't handle, return null and let the default
9154// expansion code take care of it. If we CAN select this case, and if it
9155// selects to a single instruction, return Op. Otherwise, if we can codegen
9156// this case more efficiently than a constant pool load, lower it to the
9157// sequence of ops that should be used.
9158SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9159 SelectionDAG &DAG) const {
9160 SDLoc dl(Op);
9161 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9162 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")(static_cast <bool> (BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9162, __extension__
__PRETTY_FUNCTION__))
;
9163
9164 // Check if this is a splat of a constant value.
9165 APInt APSplatBits, APSplatUndef;
9166 unsigned SplatBitSize;
9167 bool HasAnyUndefs;
9168 bool BVNIsConstantSplat =
9169 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9170 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9171
9172 // If it is a splat of a double, check if we can shrink it to a 32 bit
9173 // non-denormal float which when converted back to double gives us the same
9174 // double. This is to exploit the XXSPLTIDP instruction.
9175 // If we lose precision, we use XXSPLTI32DX.
9176 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9177 Subtarget.hasPrefixInstrs()) {
9178 // Check the type first to short-circuit so we don't modify APSplatBits if
9179 // this block isn't executed.
9180 if ((Op->getValueType(0) == MVT::v2f64) &&
9181 convertToNonDenormSingle(APSplatBits)) {
9182 SDValue SplatNode = DAG.getNode(
9183 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9184 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9185 return DAG.getBitcast(Op.getValueType(), SplatNode);
9186 } else {
9187 // We may lose precision, so we have to use XXSPLTI32DX.
9188
9189 uint32_t Hi =
9190 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9191 uint32_t Lo =
9192 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9193 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9194
9195 if (!Hi || !Lo)
9196 // If either load is 0, then we should generate XXLXOR to set to 0.
9197 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9198
9199 if (Hi)
9200 SplatNode = DAG.getNode(
9201 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9202 DAG.getTargetConstant(0, dl, MVT::i32),
9203 DAG.getTargetConstant(Hi, dl, MVT::i32));
9204
9205 if (Lo)
9206 SplatNode =
9207 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9208 DAG.getTargetConstant(1, dl, MVT::i32),
9209 DAG.getTargetConstant(Lo, dl, MVT::i32));
9210
9211 return DAG.getBitcast(Op.getValueType(), SplatNode);
9212 }
9213 }
9214
9215 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9216 unsigned NewOpcode = PPCISD::LD_SPLAT;
9217
9218 // Handle load-and-splat patterns as we have instructions that will do this
9219 // in one go.
9220 if (DAG.isSplatValue(Op, true) &&
9221 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9222 const SDValue *InputLoad = &Op.getOperand(0);
9223 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9224
9225 // If the input load is an extending load, it will be an i32 -> i64
9226 // extending load and isValidSplatLoad() will update NewOpcode.
9227 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9228 unsigned ElementSize =
9229 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9230
9231 assert(((ElementSize == 2 * MemorySize)(static_cast <bool> (((ElementSize == 2 * MemorySize) ?
(NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::
SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && "Unmatched element size and opcode!\n"
) ? void (0) : __assert_fail ("((ElementSize == 2 * MemorySize) ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && \"Unmatched element size and opcode!\\n\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9235, __extension__
__PRETTY_FUNCTION__))
9232 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||(static_cast <bool> (((ElementSize == 2 * MemorySize) ?
(NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::
SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && "Unmatched element size and opcode!\n"
) ? void (0) : __assert_fail ("((ElementSize == 2 * MemorySize) ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && \"Unmatched element size and opcode!\\n\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9235, __extension__
__PRETTY_FUNCTION__))
9233 NewOpcode == PPCISD::SEXT_LD_SPLAT)(static_cast <bool> (((ElementSize == 2 * MemorySize) ?
(NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::
SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && "Unmatched element size and opcode!\n"
) ? void (0) : __assert_fail ("((ElementSize == 2 * MemorySize) ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && \"Unmatched element size and opcode!\\n\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9235, __extension__
__PRETTY_FUNCTION__))
9234 : (NewOpcode == PPCISD::LD_SPLAT)) &&(static_cast <bool> (((ElementSize == 2 * MemorySize) ?
(NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::
SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && "Unmatched element size and opcode!\n"
) ? void (0) : __assert_fail ("((ElementSize == 2 * MemorySize) ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && \"Unmatched element size and opcode!\\n\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9235, __extension__
__PRETTY_FUNCTION__))
9235 "Unmatched element size and opcode!\n")(static_cast <bool> (((ElementSize == 2 * MemorySize) ?
(NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::
SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && "Unmatched element size and opcode!\n"
) ? void (0) : __assert_fail ("((ElementSize == 2 * MemorySize) ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT || NewOpcode == PPCISD::SEXT_LD_SPLAT) : (NewOpcode == PPCISD::LD_SPLAT)) && \"Unmatched element size and opcode!\\n\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9235, __extension__
__PRETTY_FUNCTION__))
;
9236
9237 // Checking for a single use of this load, we have to check for vector
9238 // width (128 bits) / ElementSize uses (since each operand of the
9239 // BUILD_VECTOR is a separate use of the value.
9240 unsigned NumUsesOfInputLD = 128 / ElementSize;
9241 for (SDValue BVInOp : Op->ops())
9242 if (BVInOp.isUndef())
9243 NumUsesOfInputLD--;
9244
9245 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9246 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9247 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9248 // 15", but funciton IsValidSplatLoad() now will only return true when
9249 // the data at index 0 is not nullptr. So we will not get into trouble for
9250 // these cases.
9251 //
9252 // case 1 - lfiwzx/lfiwax
9253 // 1.1: load result is i32 and is sign/zero extend to i64;
9254 // 1.2: build a v2i64 vector type with above loaded value;
9255 // 1.3: the vector has only one value at index 0, others are all undef;
9256 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9257 if (NumUsesOfInputLD == 1 &&
9258 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9259 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9260 Subtarget.hasLFIWAX()))
9261 return SDValue();
9262
9263 // case 2 - lxvr[hb]x
9264 // 2.1: load result is at most i16;
9265 // 2.2: build a vector with above loaded value;
9266 // 2.3: the vector has only one value at index 0, others are all undef;
9267 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9268 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9269 Subtarget.isISA3_1() && ElementSize <= 16)
9270 return SDValue();
9271
9272 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?")(static_cast <bool> (NumUsesOfInputLD > 0 &&
"No uses of input LD of a build_vector?") ? void (0) : __assert_fail
("NumUsesOfInputLD > 0 && \"No uses of input LD of a build_vector?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9272, __extension__
__PRETTY_FUNCTION__))
;
9273 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9274 Subtarget.hasVSX()) {
9275 SDValue Ops[] = {
9276 LD->getChain(), // Chain
9277 LD->getBasePtr(), // Ptr
9278 DAG.getValueType(Op.getValueType()) // VT
9279 };
9280 SDValue LdSplt = DAG.getMemIntrinsicNode(
9281 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9282 LD->getMemoryVT(), LD->getMemOperand());
9283 // Replace all uses of the output chain of the original load with the
9284 // output chain of the new load.
9285 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9286 LdSplt.getValue(1));
9287 return LdSplt;
9288 }
9289 }
9290
9291 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9292 // 32-bits can be lowered to VSX instructions under certain conditions.
9293 // Without VSX, there is no pattern more efficient than expanding the node.
9294 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9295 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9296 Subtarget.hasP8Vector()))
9297 return Op;
9298 return SDValue();
9299 }
9300
9301 uint64_t SplatBits = APSplatBits.getZExtValue();
9302 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9303 unsigned SplatSize = SplatBitSize / 8;
9304
9305 // First, handle single instruction cases.
9306
9307 // All zeros?
9308 if (SplatBits == 0) {
9309 // Canonicalize all zero vectors to be v4i32.
9310 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9311 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9312 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9313 }
9314 return Op;
9315 }
9316
9317 // We have XXSPLTIW for constant splats four bytes wide.
9318 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9319 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9320 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9321 // turned into a 4-byte splat of 0xABABABAB.
9322 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9323 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9324 Op.getValueType(), DAG, dl);
9325
9326 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9327 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9328 dl);
9329
9330 // We have XXSPLTIB for constant splats one byte wide.
9331 if (Subtarget.hasP9Vector() && SplatSize == 1)
9332 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9333 dl);
9334
9335 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9336 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9337 (32-SplatBitSize));
9338 if (SextVal >= -16 && SextVal <= 15)
9339 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9340 dl);
9341
9342 // Two instruction sequences.
9343
9344 // If this value is in the range [-32,30] and is even, use:
9345 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9346 // If this value is in the range [17,31] and is odd, use:
9347 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9348 // If this value is in the range [-31,-17] and is odd, use:
9349 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9350 // Note the last two are three-instruction sequences.
9351 if (SextVal >= -32 && SextVal <= 31) {
9352 // To avoid having these optimizations undone by constant folding,
9353 // we convert to a pseudo that will be expanded later into one of
9354 // the above forms.
9355 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9356 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9357 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9358 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9359 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9360 if (VT == Op.getValueType())
9361 return RetVal;
9362 else
9363 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9364 }
9365
9366 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9367 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9368 // for fneg/fabs.
9369 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9370 // Make -1 and vspltisw -1:
9371 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9372
9373 // Make the VSLW intrinsic, computing 0x8000_0000.
9374 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9375 OnesV, DAG, dl);
9376
9377 // xor by OnesV to invert it.
9378 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9379 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9380 }
9381
9382 // Check to see if this is a wide variety of vsplti*, binop self cases.
9383 static const signed char SplatCsts[] = {
9384 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9385 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9386 };
9387
9388 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9389 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9390 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9391 int i = SplatCsts[idx];
9392
9393 // Figure out what shift amount will be used by altivec if shifted by i in
9394 // this splat size.
9395 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9396
9397 // vsplti + shl self.
9398 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9399 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9400 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9401 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9402 Intrinsic::ppc_altivec_vslw
9403 };
9404 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9405 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9406 }
9407
9408 // vsplti + srl self.
9409 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9410 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9411 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9412 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9413 Intrinsic::ppc_altivec_vsrw
9414 };
9415 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9416 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9417 }
9418
9419 // vsplti + rol self.
9420 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9421 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9422 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9423 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9424 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9425 Intrinsic::ppc_altivec_vrlw
9426 };
9427 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9428 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9429 }
9430
9431 // t = vsplti c, result = vsldoi t, t, 1
9432 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9433 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9434 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9435 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9436 }
9437 // t = vsplti c, result = vsldoi t, t, 2
9438 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9439 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9440 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9441 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9442 }
9443 // t = vsplti c, result = vsldoi t, t, 3
9444 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9445 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9446 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9447 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9448 }
9449 }
9450
9451 return SDValue();
9452}
9453
9454/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9455/// the specified operations to build the shuffle.
9456static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9457 SDValue RHS, SelectionDAG &DAG,
9458 const SDLoc &dl) {
9459 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9460 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9461 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9462
9463 enum {
9464 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9465 OP_VMRGHW,
9466 OP_VMRGLW,
9467 OP_VSPLTISW0,
9468 OP_VSPLTISW1,
9469 OP_VSPLTISW2,
9470 OP_VSPLTISW3,
9471 OP_VSLDOI4,
9472 OP_VSLDOI8,
9473 OP_VSLDOI12
9474 };
9475
9476 if (OpNum == OP_COPY) {
9477 if (LHSID == (1*9+2)*9+3) return LHS;
9478 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!") ? void (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9478, __extension__
__PRETTY_FUNCTION__))
;
9479 return RHS;
9480 }
9481
9482 SDValue OpLHS, OpRHS;
9483 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9484 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9485
9486 int ShufIdxs[16];
9487 switch (OpNum) {
9488 default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9488)
;
9489 case OP_VMRGHW:
9490 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9491 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9492 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9493 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9494 break;
9495 case OP_VMRGLW:
9496 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9497 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9498 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9499 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9500 break;
9501 case OP_VSPLTISW0:
9502 for (unsigned i = 0; i != 16; ++i)
9503 ShufIdxs[i] = (i&3)+0;
9504 break;
9505 case OP_VSPLTISW1:
9506 for (unsigned i = 0; i != 16; ++i)
9507 ShufIdxs[i] = (i&3)+4;
9508 break;
9509 case OP_VSPLTISW2:
9510 for (unsigned i = 0; i != 16; ++i)
9511 ShufIdxs[i] = (i&3)+8;
9512 break;
9513 case OP_VSPLTISW3:
9514 for (unsigned i = 0; i != 16; ++i)
9515 ShufIdxs[i] = (i&3)+12;
9516 break;
9517 case OP_VSLDOI4:
9518 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9519 case OP_VSLDOI8:
9520 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9521 case OP_VSLDOI12:
9522 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9523 }
9524 EVT VT = OpLHS.getValueType();
9525 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9526 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9527 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9528 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9529}
9530
9531/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9532/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9533/// SDValue.
9534SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9535 SelectionDAG &DAG) const {
9536 const unsigned BytesInVector = 16;
9537 bool IsLE = Subtarget.isLittleEndian();
9538 SDLoc dl(N);
9539 SDValue V1 = N->getOperand(0);
9540 SDValue V2 = N->getOperand(1);
9541 unsigned ShiftElts = 0, InsertAtByte = 0;
9542 bool Swap = false;
9543
9544 // Shifts required to get the byte we want at element 7.
9545 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9546 0, 15, 14, 13, 12, 11, 10, 9};
9547 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9548 1, 2, 3, 4, 5, 6, 7, 8};
9549
9550 ArrayRef<int> Mask = N->getMask();
9551 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9552
9553 // For each mask element, find out if we're just inserting something
9554 // from V2 into V1 or vice versa.
9555 // Possible permutations inserting an element from V2 into V1:
9556 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9557 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9558 // ...
9559 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9560 // Inserting from V1 into V2 will be similar, except mask range will be
9561 // [16,31].
9562
9563 bool FoundCandidate = false;
9564 // If both vector operands for the shuffle are the same vector, the mask
9565 // will contain only elements from the first one and the second one will be
9566 // undef.
9567 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9568 // Go through the mask of half-words to find an element that's being moved
9569 // from one vector to the other.
9570 for (unsigned i = 0; i < BytesInVector; ++i) {
9571 unsigned CurrentElement = Mask[i];
9572 // If 2nd operand is undefined, we should only look for element 7 in the
9573 // Mask.
9574 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9575 continue;
9576
9577 bool OtherElementsInOrder = true;
9578 // Examine the other elements in the Mask to see if they're in original
9579 // order.
9580 for (unsigned j = 0; j < BytesInVector; ++j) {
9581 if (j == i)
9582 continue;
9583 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9584 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9585 // in which we always assume we're always picking from the 1st operand.
9586 int MaskOffset =
9587 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9588 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9589 OtherElementsInOrder = false;
9590 break;
9591 }
9592 }
9593 // If other elements are in original order, we record the number of shifts
9594 // we need to get the element we want into element 7. Also record which byte
9595 // in the vector we should insert into.
9596 if (OtherElementsInOrder) {
9597 // If 2nd operand is undefined, we assume no shifts and no swapping.
9598 if (V2.isUndef()) {
9599 ShiftElts = 0;
9600 Swap = false;
9601 } else {
9602 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9603 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9604 : BigEndianShifts[CurrentElement & 0xF];
9605 Swap = CurrentElement < BytesInVector;
9606 }
9607 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9608 FoundCandidate = true;
9609 break;
9610 }
9611 }
9612
9613 if (!FoundCandidate)
9614 return SDValue();
9615
9616 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9617 // optionally with VECSHL if shift is required.
9618 if (Swap)
9619 std::swap(V1, V2);
9620 if (V2.isUndef())
9621 V2 = V1;
9622 if (ShiftElts) {
9623 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9624 DAG.getConstant(ShiftElts, dl, MVT::i32));
9625 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9626 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9627 }
9628 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9629 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9630}
9631
9632/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9633/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9634/// SDValue.
9635SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9636 SelectionDAG &DAG) const {
9637 const unsigned NumHalfWords = 8;
9638 const unsigned BytesInVector = NumHalfWords * 2;
9639 // Check that the shuffle is on half-words.
9640 if (!isNByteElemShuffleMask(N, 2, 1))
9641 return SDValue();
9642
9643 bool IsLE = Subtarget.isLittleEndian();
9644 SDLoc dl(N);
9645 SDValue V1 = N->getOperand(0);
9646 SDValue V2 = N->getOperand(1);
9647 unsigned ShiftElts = 0, InsertAtByte = 0;
9648 bool Swap = false;
9649
9650 // Shifts required to get the half-word we want at element 3.
9651 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9652 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9653
9654 uint32_t Mask = 0;
9655 uint32_t OriginalOrderLow = 0x1234567;
9656 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9657 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9658 // 32-bit space, only need 4-bit nibbles per element.
9659 for (unsigned i = 0; i < NumHalfWords; ++i) {
9660 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9661 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9662 }
9663
9664 // For each mask element, find out if we're just inserting something
9665 // from V2 into V1 or vice versa. Possible permutations inserting an element
9666 // from V2 into V1:
9667 // X, 1, 2, 3, 4, 5, 6, 7
9668 // 0, X, 2, 3, 4, 5, 6, 7
9669 // 0, 1, X, 3, 4, 5, 6, 7
9670 // 0, 1, 2, X, 4, 5, 6, 7
9671 // 0, 1, 2, 3, X, 5, 6, 7
9672 // 0, 1, 2, 3, 4, X, 6, 7
9673 // 0, 1, 2, 3, 4, 5, X, 7
9674 // 0, 1, 2, 3, 4, 5, 6, X
9675 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9676
9677 bool FoundCandidate = false;
9678 // Go through the mask of half-words to find an element that's being moved
9679 // from one vector to the other.
9680 for (unsigned i = 0; i < NumHalfWords; ++i) {
9681 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9682 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9683 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9684 uint32_t TargetOrder = 0x0;
9685
9686 // If both vector operands for the shuffle are the same vector, the mask
9687 // will contain only elements from the first one and the second one will be
9688 // undef.
9689 if (V2.isUndef()) {
9690 ShiftElts = 0;
9691 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9692 TargetOrder = OriginalOrderLow;
9693 Swap = false;
9694 // Skip if not the correct element or mask of other elements don't equal
9695 // to our expected order.
9696 if (MaskOneElt == VINSERTHSrcElem &&
9697 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9698 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9699 FoundCandidate = true;
9700 break;
9701 }
9702 } else { // If both operands are defined.
9703 // Target order is [8,15] if the current mask is between [0,7].
9704 TargetOrder =
9705 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9706 // Skip if mask of other elements don't equal our expected order.
9707 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9708 // We only need the last 3 bits for the number of shifts.
9709 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9710 : BigEndianShifts[MaskOneElt & 0x7];
9711 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9712 Swap = MaskOneElt < NumHalfWords;
9713 FoundCandidate = true;
9714 break;
9715 }
9716 }
9717 }
9718
9719 if (!FoundCandidate)
9720 return SDValue();
9721
9722 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9723 // optionally with VECSHL if shift is required.
9724 if (Swap)
9725 std::swap(V1, V2);
9726 if (V2.isUndef())
9727 V2 = V1;
9728 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9729 if (ShiftElts) {
9730 // Double ShiftElts because we're left shifting on v16i8 type.
9731 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9732 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9733 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9734 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9735 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9736 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9737 }
9738 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9739 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9740 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9741 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9742}
9743
9744/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9745/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9746/// return the default SDValue.
9747SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9748 SelectionDAG &DAG) const {
9749 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9750 // to v16i8. Peek through the bitcasts to get the actual operands.
9751 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9752 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9753
9754 auto ShuffleMask = SVN->getMask();
9755 SDValue VecShuffle(SVN, 0);
9756 SDLoc DL(SVN);
9757
9758 // Check that we have a four byte shuffle.
9759 if (!isNByteElemShuffleMask(SVN, 4, 1))
9760 return SDValue();
9761
9762 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9763 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9764 std::swap(LHS, RHS);
9765 VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9766 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9767 }
9768
9769 // Ensure that the RHS is a vector of constants.
9770 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9771 if (!BVN)
9772 return SDValue();
9773
9774 // Check if RHS is a splat of 4-bytes (or smaller).
9775 APInt APSplatValue, APSplatUndef;
9776 unsigned SplatBitSize;
9777 bool HasAnyUndefs;
9778 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9779 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9780 SplatBitSize > 32)
9781 return SDValue();
9782
9783 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9784 // The instruction splats a constant C into two words of the source vector
9785 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9786 // Thus we check that the shuffle mask is the equivalent of
9787 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9788 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9789 // within each word are consecutive, so we only need to check the first byte.
9790 SDValue Index;
9791 bool IsLE = Subtarget.isLittleEndian();
9792 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9793 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9794 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9795 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9796 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9797 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9798 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9799 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9800 else
9801 return SDValue();
9802
9803 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9804 // for XXSPLTI32DX.
9805 unsigned SplatVal = APSplatValue.getZExtValue();
9806 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9807 SplatVal |= (SplatVal << SplatBitSize);
9808
9809 SDValue SplatNode = DAG.getNode(
9810 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9811 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9812 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9813}
9814
9815/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9816/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9817/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9818/// i.e (or (shl x, C1), (srl x, 128-C1)).
9819SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9820 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL")(static_cast <bool> (Op.getOpcode() == ISD::ROTL &&
"Should only be called for ISD::ROTL") ? void (0) : __assert_fail
("Op.getOpcode() == ISD::ROTL && \"Should only be called for ISD::ROTL\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9820, __extension__
__PRETTY_FUNCTION__))
;
9821 assert(Op.getValueType() == MVT::v1i128 &&(static_cast <bool> (Op.getValueType() == MVT::v1i128 &&
"Only set v1i128 as custom, other type shouldn't reach here!"
) ? void (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9822, __extension__
__PRETTY_FUNCTION__))
9822 "Only set v1i128 as custom, other type shouldn't reach here!")(static_cast <bool> (Op.getValueType() == MVT::v1i128 &&
"Only set v1i128 as custom, other type shouldn't reach here!"
) ? void (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9822, __extension__
__PRETTY_FUNCTION__))
;
9823 SDLoc dl(Op);
9824 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9825 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9826 unsigned SHLAmt = N1.getConstantOperandVal(0);
9827 if (SHLAmt % 8 == 0) {
9828 std::array<int, 16> Mask;
9829 std::iota(Mask.begin(), Mask.end(), 0);
9830 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9831 if (SDValue Shuffle =
9832 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9833 DAG.getUNDEF(MVT::v16i8), Mask))
9834 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9835 }
9836 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9837 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9838 DAG.getConstant(SHLAmt, dl, MVT::i32));
9839 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9840 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9841 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9842 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9843}
9844
9845/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9846/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9847/// return the code it can be lowered into. Worst case, it can always be
9848/// lowered into a vperm.
9849SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9850 SelectionDAG &DAG) const {
9851 SDLoc dl(Op);
9852 SDValue V1 = Op.getOperand(0);
9853 SDValue V2 = Op.getOperand(1);
9854 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9855
9856 // Any nodes that were combined in the target-independent combiner prior
9857 // to vector legalization will not be sent to the target combine. Try to
9858 // combine it here.
9859 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9860 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9861 return NewShuffle;
9862 Op = NewShuffle;
9863 SVOp = cast<ShuffleVectorSDNode>(Op);
9864 V1 = Op.getOperand(0);
9865 V2 = Op.getOperand(1);
9866 }
9867 EVT VT = Op.getValueType();
9868 bool isLittleEndian = Subtarget.isLittleEndian();
9869
9870 unsigned ShiftElts, InsertAtByte;
9871 bool Swap = false;
9872
9873 // If this is a load-and-splat, we can do that with a single instruction
9874 // in some cases. However if the load has multiple uses, we don't want to
9875 // combine it because that will just produce multiple loads.
9876 bool IsPermutedLoad = false;
9877 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9878 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9879 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9880 InputLoad->hasOneUse()) {
9881 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9882 int SplatIdx =
9883 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9884
9885 // The splat index for permuted loads will be in the left half of the vector
9886 // which is strictly wider than the loaded value by 8 bytes. So we need to
9887 // adjust the splat index to point to the correct address in memory.
9888 if (IsPermutedLoad) {
9889 assert((isLittleEndian || IsFourByte) &&(static_cast <bool> ((isLittleEndian || IsFourByte) &&
"Unexpected size for permuted load on big endian target") ? void
(0) : __assert_fail ("(isLittleEndian || IsFourByte) && \"Unexpected size for permuted load on big endian target\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9890, __extension__
__PRETTY_FUNCTION__))
9890 "Unexpected size for permuted load on big endian target")(static_cast <bool> ((isLittleEndian || IsFourByte) &&
"Unexpected size for permuted load on big endian target") ? void
(0) : __assert_fail ("(isLittleEndian || IsFourByte) && \"Unexpected size for permuted load on big endian target\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9890, __extension__
__PRETTY_FUNCTION__))
;
9891 SplatIdx += IsFourByte ? 2 : 1;
9892 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&(static_cast <bool> ((SplatIdx < (IsFourByte ? 4 : 2
)) && "Splat of a value outside of the loaded memory"
) ? void (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9893, __extension__
__PRETTY_FUNCTION__))
9893 "Splat of a value outside of the loaded memory")(static_cast <bool> ((SplatIdx < (IsFourByte ? 4 : 2
)) && "Splat of a value outside of the loaded memory"
) ? void (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 9893, __extension__
__PRETTY_FUNCTION__))
;
9894 }
9895
9896 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9897 // For 4-byte load-and-splat, we need Power9.
9898 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9899 uint64_t Offset = 0;
9900 if (IsFourByte)
9901 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9902 else
9903 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9904
9905 // If the width of the load is the same as the width of the splat,
9906 // loading with an offset would load the wrong memory.
9907 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9908 Offset = 0;
9909
9910 SDValue BasePtr = LD->getBasePtr();
9911 if (Offset != 0)
9912 BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9913 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9914 SDValue Ops[] = {
9915 LD->getChain(), // Chain
9916 BasePtr, // BasePtr
9917 DAG.getValueType(Op.getValueType()) // VT
9918 };
9919 SDVTList VTL =
9920 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9921 SDValue LdSplt =
9922 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9923 Ops, LD->getMemoryVT(), LD->getMemOperand());
9924 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9925 if (LdSplt.getValueType() != SVOp->getValueType(0))
9926 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9927 return LdSplt;
9928 }
9929 }
9930 if (Subtarget.hasP9Vector() &&
9931 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9932 isLittleEndian)) {
9933 if (Swap)
9934 std::swap(V1, V2);
9935 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9936 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9937 if (ShiftElts) {
9938 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9939 DAG.getConstant(ShiftElts, dl, MVT::i32));
9940 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9941 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9942 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9943 }
9944 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9945 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9946 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9947 }
9948
9949 if (Subtarget.hasPrefixInstrs()) {
9950 SDValue SplatInsertNode;
9951 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9952 return SplatInsertNode;
9953 }
9954
9955 if (Subtarget.hasP9Altivec()) {
9956 SDValue NewISDNode;
9957 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9958 return NewISDNode;
9959
9960 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9961 return NewISDNode;
9962 }
9963
9964 if (Subtarget.hasVSX() &&
9965 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9966 if (Swap)
9967 std::swap(V1, V2);
9968 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9969 SDValue Conv2 =
9970 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9971
9972 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9973 DAG.getConstant(ShiftElts, dl, MVT::i32));
9974 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9975 }
9976
9977 if (Subtarget.hasVSX() &&
9978 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9979 if (Swap)
9980 std::swap(V1, V2);
9981 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9982 SDValue Conv2 =
9983 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9984
9985 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9986 DAG.getConstant(ShiftElts, dl, MVT::i32));
9987 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9988 }
9989
9990 if (Subtarget.hasP9Vector()) {
9991 if (PPC::isXXBRHShuffleMask(SVOp)) {
9992 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9993 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9994 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9995 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9996 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9997 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9998 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9999 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10000 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10001 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10002 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10003 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10004 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10005 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10006 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10007 }
10008 }
10009
10010 if (Subtarget.hasVSX()) {
10011 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10012 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10013
10014 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10015 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10016 DAG.getConstant(SplatIdx, dl, MVT::i32));
10017 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10018 }
10019
10020 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10021 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10022 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10023 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10024 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10025 }
10026 }
10027
10028 // Cases that are handled by instructions that take permute immediates
10029 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10030 // selected by the instruction selector.
10031 if (V2.isUndef()) {
10032 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10033 PPC::isSplatShuffleMask(SVOp, 2) ||
10034 PPC::isSplatShuffleMask(SVOp, 4) ||
10035 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10036 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10037 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10038 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10039 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10040 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10041 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10042 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10043 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10044 (Subtarget.hasP8Altivec() && (
10045 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10046 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10047 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10048 return Op;
10049 }
10050 }
10051
10052 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10053 // and produce a fixed permutation. If any of these match, do not lower to
10054 // VPERM.
10055 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10056 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10057 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10058 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10059 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10060 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10061 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10062 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10063 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10064 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10065 (Subtarget.hasP8Altivec() && (
10066 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10067 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10068 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10069 return Op;
10070
10071 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10072 // perfect shuffle table to emit an optimal matching sequence.
10073 ArrayRef<int> PermMask = SVOp->getMask();
10074
10075 if (!DisablePerfectShuffle && !isLittleEndian) {
10076 unsigned PFIndexes[4];
10077 bool isFourElementShuffle = true;
10078 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10079 ++i) { // Element number
10080 unsigned EltNo = 8; // Start out undef.
10081 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10082 if (PermMask[i * 4 + j] < 0)
10083 continue; // Undef, ignore it.
10084
10085 unsigned ByteSource = PermMask[i * 4 + j];
10086 if ((ByteSource & 3) != j) {
10087 isFourElementShuffle = false;
10088 break;
10089 }
10090
10091 if (EltNo == 8) {
10092 EltNo = ByteSource / 4;
10093 } else if (EltNo != ByteSource / 4) {
10094 isFourElementShuffle = false;
10095 break;
10096 }
10097 }
10098 PFIndexes[i] = EltNo;
10099 }
10100
10101 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10102 // perfect shuffle vector to determine if it is cost effective to do this as
10103 // discrete instructions, or whether we should use a vperm.
10104 // For now, we skip this for little endian until such time as we have a
10105 // little-endian perfect shuffle table.
10106 if (isFourElementShuffle) {
10107 // Compute the index in the perfect shuffle table.
10108 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10109 PFIndexes[2] * 9 + PFIndexes[3];
10110
10111 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10112 unsigned Cost = (PFEntry >> 30);
10113
10114 // Determining when to avoid vperm is tricky. Many things affect the cost
10115 // of vperm, particularly how many times the perm mask needs to be
10116 // computed. For example, if the perm mask can be hoisted out of a loop or
10117 // is already used (perhaps because there are multiple permutes with the
10118 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10119 // permute mask out of the loop requires an extra register.
10120 //
10121 // As a compromise, we only emit discrete instructions if the shuffle can
10122 // be generated in 3 or fewer operations. When we have loop information
10123 // available, if this block is within a loop, we should avoid using vperm
10124 // for 3-operation perms and use a constant pool load instead.
10125 if (Cost < 3)
10126 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10127 }
10128 }
10129
10130 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10131 // vector that will get spilled to the constant pool.
10132 if (V2.isUndef()) V2 = V1;
10133
10134 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10135 // that it is in input element units, not in bytes. Convert now.
10136
10137 // For little endian, the order of the input vectors is reversed, and
10138 // the permutation mask is complemented with respect to 31. This is
10139 // necessary to produce proper semantics with the big-endian-biased vperm
10140 // instruction.
10141 EVT EltVT = V1.getValueType().getVectorElementType();
10142 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10143
10144 SmallVector<SDValue, 16> ResultMask;
10145 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10146 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10147
10148 for (unsigned j = 0; j != BytesPerElement; ++j)
10149 if (isLittleEndian)
10150 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10151 dl, MVT::i32));
10152 else
10153 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10154 MVT::i32));
10155 }
10156
10157 ShufflesHandledWithVPERM++;
10158 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10159 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "Emitting a VPERM for the following shuffle:\n"
; } } while (false)
;
10160 LLVM_DEBUG(SVOp->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { SVOp->dump(); } } while (false)
;
10161 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "With the following permute control vector:\n"
; } } while (false)
;
10162 LLVM_DEBUG(VPermMask.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { VPermMask.dump(); } } while (false)
;
10163
10164 if (isLittleEndian)
10165 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10166 V2, V1, VPermMask);
10167 else
10168 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10169 V1, V2, VPermMask);
10170}
10171
10172/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10173/// vector comparison. If it is, return true and fill in Opc/isDot with
10174/// information about the intrinsic.
10175static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10176 bool &isDot, const PPCSubtarget &Subtarget) {
10177 unsigned IntrinsicID =
10178 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10179 CompareOpc = -1;
10180 isDot = false;
10181 switch (IntrinsicID) {
10182 default:
10183 return false;
10184 // Comparison predicates.
10185 case Intrinsic::ppc_altivec_vcmpbfp_p:
10186 CompareOpc = 966;
10187 isDot = true;
10188 break;
10189 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10190 CompareOpc = 198;
10191 isDot = true;
10192 break;
10193 case Intrinsic::ppc_altivec_vcmpequb_p:
10194 CompareOpc = 6;
10195 isDot = true;
10196 break;
10197 case Intrinsic::ppc_altivec_vcmpequh_p:
10198 CompareOpc = 70;
10199 isDot = true;
10200 break;
10201 case Intrinsic::ppc_altivec_vcmpequw_p:
10202 CompareOpc = 134;
10203 isDot = true;
10204 break;
10205 case Intrinsic::ppc_altivec_vcmpequd_p:
10206 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10207 CompareOpc = 199;
10208 isDot = true;
10209 } else
10210 return false;
10211 break;
10212 case Intrinsic::ppc_altivec_vcmpneb_p:
10213 case Intrinsic::ppc_altivec_vcmpneh_p:
10214 case Intrinsic::ppc_altivec_vcmpnew_p:
10215 case Intrinsic::ppc_altivec_vcmpnezb_p:
10216 case Intrinsic::ppc_altivec_vcmpnezh_p:
10217 case Intrinsic::ppc_altivec_vcmpnezw_p:
10218 if (Subtarget.hasP9Altivec()) {
10219 switch (IntrinsicID) {
10220 default:
10221 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10221)
;
10222 case Intrinsic::ppc_altivec_vcmpneb_p:
10223 CompareOpc = 7;
10224 break;
10225 case Intrinsic::ppc_altivec_vcmpneh_p:
10226 CompareOpc = 71;
10227 break;
10228 case Intrinsic::ppc_altivec_vcmpnew_p:
10229 CompareOpc = 135;
10230 break;
10231 case Intrinsic::ppc_altivec_vcmpnezb_p:
10232 CompareOpc = 263;
10233 break;
10234 case Intrinsic::ppc_altivec_vcmpnezh_p:
10235 CompareOpc = 327;
10236 break;
10237 case Intrinsic::ppc_altivec_vcmpnezw_p:
10238 CompareOpc = 391;
10239 break;
10240 }
10241 isDot = true;
10242 } else
10243 return false;
10244 break;
10245 case Intrinsic::ppc_altivec_vcmpgefp_p:
10246 CompareOpc = 454;
10247 isDot = true;
10248 break;
10249 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10250 CompareOpc = 710;
10251 isDot = true;
10252 break;
10253 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10254 CompareOpc = 774;
10255 isDot = true;
10256 break;
10257 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10258 CompareOpc = 838;
10259 isDot = true;
10260 break;
10261 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10262 CompareOpc = 902;
10263 isDot = true;
10264 break;
10265 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10266 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10267 CompareOpc = 967;
10268 isDot = true;
10269 } else
10270 return false;
10271 break;
10272 case Intrinsic::ppc_altivec_vcmpgtub_p:
10273 CompareOpc = 518;
10274 isDot = true;
10275 break;
10276 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10277 CompareOpc = 582;
10278 isDot = true;
10279 break;
10280 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10281 CompareOpc = 646;
10282 isDot = true;
10283 break;
10284 case Intrinsic::ppc_altivec_vcmpgtud_p:
10285 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10286 CompareOpc = 711;
10287 isDot = true;
10288 } else
10289 return false;
10290 break;
10291
10292 case Intrinsic::ppc_altivec_vcmpequq:
10293 case Intrinsic::ppc_altivec_vcmpgtsq:
10294 case Intrinsic::ppc_altivec_vcmpgtuq:
10295 if (!Subtarget.isISA3_1())
10296 return false;
10297 switch (IntrinsicID) {
10298 default:
10299 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10299)
;
10300 case Intrinsic::ppc_altivec_vcmpequq:
10301 CompareOpc = 455;
10302 break;
10303 case Intrinsic::ppc_altivec_vcmpgtsq:
10304 CompareOpc = 903;
10305 break;
10306 case Intrinsic::ppc_altivec_vcmpgtuq:
10307 CompareOpc = 647;
10308 break;
10309 }
10310 break;
10311
10312 // VSX predicate comparisons use the same infrastructure
10313 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10314 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10315 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10316 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10317 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10318 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10319 if (Subtarget.hasVSX()) {
10320 switch (IntrinsicID) {
10321 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10322 CompareOpc = 99;
10323 break;
10324 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10325 CompareOpc = 115;
10326 break;
10327 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10328 CompareOpc = 107;
10329 break;
10330 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10331 CompareOpc = 67;
10332 break;
10333 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10334 CompareOpc = 83;
10335 break;
10336 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10337 CompareOpc = 75;
10338 break;
10339 }
10340 isDot = true;
10341 } else
10342 return false;
10343 break;
10344
10345 // Normal Comparisons.
10346 case Intrinsic::ppc_altivec_vcmpbfp:
10347 CompareOpc = 966;
10348 break;
10349 case Intrinsic::ppc_altivec_vcmpeqfp:
10350 CompareOpc = 198;
10351 break;
10352 case Intrinsic::ppc_altivec_vcmpequb:
10353 CompareOpc = 6;
10354 break;
10355 case Intrinsic::ppc_altivec_vcmpequh:
10356 CompareOpc = 70;
10357 break;
10358 case Intrinsic::ppc_altivec_vcmpequw:
10359 CompareOpc = 134;
10360 break;
10361 case Intrinsic::ppc_altivec_vcmpequd:
10362 if (Subtarget.hasP8Altivec())
10363 CompareOpc = 199;
10364 else
10365 return false;
10366 break;
10367 case Intrinsic::ppc_altivec_vcmpneb:
10368 case Intrinsic::ppc_altivec_vcmpneh:
10369 case Intrinsic::ppc_altivec_vcmpnew:
10370 case Intrinsic::ppc_altivec_vcmpnezb:
10371 case Intrinsic::ppc_altivec_vcmpnezh:
10372 case Intrinsic::ppc_altivec_vcmpnezw:
10373 if (Subtarget.hasP9Altivec())
10374 switch (IntrinsicID) {
10375 default:
10376 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10376)
;
10377 case Intrinsic::ppc_altivec_vcmpneb:
10378 CompareOpc = 7;
10379 break;
10380 case Intrinsic::ppc_altivec_vcmpneh:
10381 CompareOpc = 71;
10382 break;
10383 case Intrinsic::ppc_altivec_vcmpnew:
10384 CompareOpc = 135;
10385 break;
10386 case Intrinsic::ppc_altivec_vcmpnezb:
10387 CompareOpc = 263;
10388 break;
10389 case Intrinsic::ppc_altivec_vcmpnezh:
10390 CompareOpc = 327;
10391 break;
10392 case Intrinsic::ppc_altivec_vcmpnezw:
10393 CompareOpc = 391;
10394 break;
10395 }
10396 else
10397 return false;
10398 break;
10399 case Intrinsic::ppc_altivec_vcmpgefp:
10400 CompareOpc = 454;
10401 break;
10402 case Intrinsic::ppc_altivec_vcmpgtfp:
10403 CompareOpc = 710;
10404 break;
10405 case Intrinsic::ppc_altivec_vcmpgtsb:
10406 CompareOpc = 774;
10407 break;
10408 case Intrinsic::ppc_altivec_vcmpgtsh:
10409 CompareOpc = 838;
10410 break;
10411 case Intrinsic::ppc_altivec_vcmpgtsw:
10412 CompareOpc = 902;
10413 break;
10414 case Intrinsic::ppc_altivec_vcmpgtsd:
10415 if (Subtarget.hasP8Altivec())
10416 CompareOpc = 967;
10417 else
10418 return false;
10419 break;
10420 case Intrinsic::ppc_altivec_vcmpgtub:
10421 CompareOpc = 518;
10422 break;
10423 case Intrinsic::ppc_altivec_vcmpgtuh:
10424 CompareOpc = 582;
10425 break;
10426 case Intrinsic::ppc_altivec_vcmpgtuw:
10427 CompareOpc = 646;
10428 break;
10429 case Intrinsic::ppc_altivec_vcmpgtud:
10430 if (Subtarget.hasP8Altivec())
10431 CompareOpc = 711;
10432 else
10433 return false;
10434 break;
10435 case Intrinsic::ppc_altivec_vcmpequq_p:
10436 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10437 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10438 if (!Subtarget.isISA3_1())
10439 return false;
10440 switch (IntrinsicID) {
10441 default:
10442 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10442)
;
10443 case Intrinsic::ppc_altivec_vcmpequq_p:
10444 CompareOpc = 455;
10445 break;
10446 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10447 CompareOpc = 903;
10448 break;
10449 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10450 CompareOpc = 647;
10451 break;
10452 }
10453 isDot = true;
10454 break;
10455 }
10456 return true;
10457}
10458
10459/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10460/// lower, do it, otherwise return null.
10461SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10462 SelectionDAG &DAG) const {
10463 unsigned IntrinsicID =
10464 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10465
10466 SDLoc dl(Op);
10467
10468 switch (IntrinsicID) {
10469 case Intrinsic::thread_pointer:
10470 // Reads the thread pointer register, used for __builtin_thread_pointer.
10471 if (Subtarget.isPPC64())
10472 return DAG.getRegister(PPC::X13, MVT::i64);
10473 return DAG.getRegister(PPC::R2, MVT::i32);
10474
10475 case Intrinsic::ppc_mma_disassemble_acc:
10476 case Intrinsic::ppc_vsx_disassemble_pair: {
10477 int NumVecs = 2;
10478 SDValue WideVec = Op.getOperand(1);
10479 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10480 NumVecs = 4;
10481 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10482 }
10483 SmallVector<SDValue, 4> RetOps;
10484 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10485 SDValue Extract = DAG.getNode(
10486 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10487 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10488 : VecNo,
10489 dl, getPointerTy(DAG.getDataLayout())));
10490 RetOps.push_back(Extract);
10491 }
10492 return DAG.getMergeValues(RetOps, dl);
10493 }
10494
10495 case Intrinsic::ppc_unpack_longdouble: {
10496 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10497 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&(static_cast <bool> (Idx && (Idx->getSExtValue
() == 0 || Idx->getSExtValue() == 1) && "Argument of long double unpack must be 0 or 1!"
) ? void (0) : __assert_fail ("Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) && \"Argument of long double unpack must be 0 or 1!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10498, __extension__
__PRETTY_FUNCTION__))
10498 "Argument of long double unpack must be 0 or 1!")(static_cast <bool> (Idx && (Idx->getSExtValue
() == 0 || Idx->getSExtValue() == 1) && "Argument of long double unpack must be 0 or 1!"
) ? void (0) : __assert_fail ("Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) && \"Argument of long double unpack must be 0 or 1!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10498, __extension__
__PRETTY_FUNCTION__))
;
10499 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10500 DAG.getConstant(!!(Idx->getSExtValue()), dl,
10501 Idx->getValueType(0)));
10502 }
10503
10504 case Intrinsic::ppc_compare_exp_lt:
10505 case Intrinsic::ppc_compare_exp_gt:
10506 case Intrinsic::ppc_compare_exp_eq:
10507 case Intrinsic::ppc_compare_exp_uo: {
10508 unsigned Pred;
10509 switch (IntrinsicID) {
10510 case Intrinsic::ppc_compare_exp_lt:
10511 Pred = PPC::PRED_LT;
10512 break;
10513 case Intrinsic::ppc_compare_exp_gt:
10514 Pred = PPC::PRED_GT;
10515 break;
10516 case Intrinsic::ppc_compare_exp_eq:
10517 Pred = PPC::PRED_EQ;
10518 break;
10519 case Intrinsic::ppc_compare_exp_uo:
10520 Pred = PPC::PRED_UN;
10521 break;
10522 }
10523 return SDValue(
10524 DAG.getMachineNode(
10525 PPC::SELECT_CC_I4, dl, MVT::i32,
10526 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10527 Op.getOperand(1), Op.getOperand(2)),
10528 0),
10529 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10530 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10531 0);
10532 }
10533 case Intrinsic::ppc_test_data_class_d:
10534 case Intrinsic::ppc_test_data_class_f: {
10535 unsigned CmprOpc = PPC::XSTSTDCDP;
10536 if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
10537 CmprOpc = PPC::XSTSTDCSP;
10538 return SDValue(
10539 DAG.getMachineNode(
10540 PPC::SELECT_CC_I4, dl, MVT::i32,
10541 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10542 Op.getOperand(1)),
10543 0),
10544 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10545 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10546 0);
10547 }
10548 case Intrinsic::ppc_fnmsub: {
10549 EVT VT = Op.getOperand(1).getValueType();
10550 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10551 return DAG.getNode(
10552 ISD::FNEG, dl, VT,
10553 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10554 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10555 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10556 Op.getOperand(2), Op.getOperand(3));
10557 }
10558 case Intrinsic::ppc_convert_f128_to_ppcf128:
10559 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10560 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10561 ? RTLIB::CONVERT_PPCF128_F128
10562 : RTLIB::CONVERT_F128_PPCF128;
10563 MakeLibCallOptions CallOptions;
10564 std::pair<SDValue, SDValue> Result =
10565 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10566 dl, SDValue());
10567 return Result.first;
10568 }
10569 case Intrinsic::ppc_maxfe:
10570 case Intrinsic::ppc_maxfl:
10571 case Intrinsic::ppc_maxfs:
10572 case Intrinsic::ppc_minfe:
10573 case Intrinsic::ppc_minfl:
10574 case Intrinsic::ppc_minfs: {
10575 EVT VT = Op.getValueType();
10576 assert((static_cast <bool> (all_of(Op->ops().drop_front(4),
[VT](const SDUse &Use) { return Use.getValueType() == VT
; }) && "ppc_[max|min]f[e|l|s] must have uniform type arguments"
) ? void (0) : __assert_fail ("all_of(Op->ops().drop_front(4), [VT](const SDUse &Use) { return Use.getValueType() == VT; }) && \"ppc_[max|min]f[e|l|s] must have uniform type arguments\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10579, __extension__
__PRETTY_FUNCTION__))
10577 all_of(Op->ops().drop_front(4),(static_cast <bool> (all_of(Op->ops().drop_front(4),
[VT](const SDUse &Use) { return Use.getValueType() == VT
; }) && "ppc_[max|min]f[e|l|s] must have uniform type arguments"
) ? void (0) : __assert_fail ("all_of(Op->ops().drop_front(4), [VT](const SDUse &Use) { return Use.getValueType() == VT; }) && \"ppc_[max|min]f[e|l|s] must have uniform type arguments\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10579, __extension__
__PRETTY_FUNCTION__))
10578 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&(static_cast <bool> (all_of(Op->ops().drop_front(4),
[VT](const SDUse &Use) { return Use.getValueType() == VT
; }) && "ppc_[max|min]f[e|l|s] must have uniform type arguments"
) ? void (0) : __assert_fail ("all_of(Op->ops().drop_front(4), [VT](const SDUse &Use) { return Use.getValueType() == VT; }) && \"ppc_[max|min]f[e|l|s] must have uniform type arguments\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10579, __extension__
__PRETTY_FUNCTION__))
10579 "ppc_[max|min]f[e|l|s] must have uniform type arguments")(static_cast <bool> (all_of(Op->ops().drop_front(4),
[VT](const SDUse &Use) { return Use.getValueType() == VT
; }) && "ppc_[max|min]f[e|l|s] must have uniform type arguments"
) ? void (0) : __assert_fail ("all_of(Op->ops().drop_front(4), [VT](const SDUse &Use) { return Use.getValueType() == VT; }) && \"ppc_[max|min]f[e|l|s] must have uniform type arguments\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10579, __extension__
__PRETTY_FUNCTION__))
;
10580 (void)VT;
10581 ISD::CondCode CC = ISD::SETGT;
10582 if (IntrinsicID == Intrinsic::ppc_minfe ||
10583 IntrinsicID == Intrinsic::ppc_minfl ||
10584 IntrinsicID == Intrinsic::ppc_minfs)
10585 CC = ISD::SETLT;
10586 unsigned I = Op.getNumOperands() - 2, Cnt = I;
10587 SDValue Res = Op.getOperand(I);
10588 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10589 Res =
10590 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10591 }
10592 return Res;
10593 }
10594 }
10595
10596 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10597 // opcode number of the comparison.
10598 int CompareOpc;
10599 bool isDot;
10600 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10601 return SDValue(); // Don't custom lower most intrinsics.
10602
10603 // If this is a non-dot comparison, make the VCMP node and we are done.
10604 if (!isDot) {
10605 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10606 Op.getOperand(1), Op.getOperand(2),
10607 DAG.getConstant(CompareOpc, dl, MVT::i32));
10608 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10609 }
10610
10611 // Create the PPCISD altivec 'dot' comparison node.
10612 SDValue Ops[] = {
10613 Op.getOperand(2), // LHS
10614 Op.getOperand(3), // RHS
10615 DAG.getConstant(CompareOpc, dl, MVT::i32)
10616 };
10617 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10618 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10619
10620 // Now that we have the comparison, emit a copy from the CR to a GPR.
10621 // This is flagged to the above dot comparison.
10622 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10623 DAG.getRegister(PPC::CR6, MVT::i32),
10624 CompNode.getValue(1));
10625
10626 // Unpack the result based on how the target uses it.
10627 unsigned BitNo; // Bit # of CR6.
10628 bool InvertBit; // Invert result?
10629 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10630 default: // Can't happen, don't crash on invalid number though.
10631 case 0: // Return the value of the EQ bit of CR6.
10632 BitNo = 0; InvertBit = false;
10633 break;
10634 case 1: // Return the inverted value of the EQ bit of CR6.
10635 BitNo = 0; InvertBit = true;
10636 break;
10637 case 2: // Return the value of the LT bit of CR6.
10638 BitNo = 2; InvertBit = false;
10639 break;
10640 case 3: // Return the inverted value of the LT bit of CR6.
10641 BitNo = 2; InvertBit = true;
10642 break;
10643 }
10644
10645 // Shift the bit into the low position.
10646 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10647 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10648 // Isolate the bit.
10649 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10650 DAG.getConstant(1, dl, MVT::i32));
10651
10652 // If we are supposed to, toggle the bit.
10653 if (InvertBit)
10654 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10655 DAG.getConstant(1, dl, MVT::i32));
10656 return Flags;
10657}
10658
10659SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10660 SelectionDAG &DAG) const {
10661 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10662 // the beginning of the argument list.
10663 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10664 SDLoc DL(Op);
10665 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10666 case Intrinsic::ppc_cfence: {
10667 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")(static_cast <bool> (ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? void (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10667, __extension__
__PRETTY_FUNCTION__))
;
10668 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")(static_cast <bool> (Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? void (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10668, __extension__
__PRETTY_FUNCTION__))
;
10669 SDValue Val = Op.getOperand(ArgStart + 1);
10670 EVT Ty = Val.getValueType();
10671 if (Ty == MVT::i128) {
10672 // FIXME: Testing one of two paired registers is sufficient to guarantee
10673 // ordering?
10674 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10675 }
10676 return SDValue(
10677 DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10678 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10679 Op.getOperand(0)),
10680 0);
10681 }
10682 default:
10683 break;
10684 }
10685 return SDValue();
10686}
10687
10688// Lower scalar BSWAP64 to xxbrd.
10689SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10690 SDLoc dl(Op);
10691 if (!Subtarget.isPPC64())
10692 return Op;
10693 // MTVSRDD
10694 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10695 Op.getOperand(0));
10696 // XXBRD
10697 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10698 // MFVSRD
10699 int VectorIndex = 0;
10700 if (Subtarget.isLittleEndian())
10701 VectorIndex = 1;
10702 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10703 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10704 return Op;
10705}
10706
10707// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10708// compared to a value that is atomically loaded (atomic loads zero-extend).
10709SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10710 SelectionDAG &DAG) const {
10711 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&(static_cast <bool> (Op.getOpcode() == ISD::ATOMIC_CMP_SWAP
&& "Expecting an atomic compare-and-swap here.") ? void
(0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10712, __extension__
__PRETTY_FUNCTION__))
10712 "Expecting an atomic compare-and-swap here.")(static_cast <bool> (Op.getOpcode() == ISD::ATOMIC_CMP_SWAP
&& "Expecting an atomic compare-and-swap here.") ? void
(0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10712, __extension__
__PRETTY_FUNCTION__))
;
10713 SDLoc dl(Op);
10714 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10715 EVT MemVT = AtomicNode->getMemoryVT();
10716 if (MemVT.getSizeInBits() >= 32)
10717 return Op;
10718
10719 SDValue CmpOp = Op.getOperand(2);
10720 // If this is already correctly zero-extended, leave it alone.
10721 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10722 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10723 return Op;
10724
10725 // Clear the high bits of the compare operand.
10726 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10727 SDValue NewCmpOp =
10728 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10729 DAG.getConstant(MaskVal, dl, MVT::i32));
10730
10731 // Replace the existing compare operand with the properly zero-extended one.
10732 SmallVector<SDValue, 4> Ops;
10733 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10734 Ops.push_back(AtomicNode->getOperand(i));
10735 Ops[2] = NewCmpOp;
10736 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10737 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10738 auto NodeTy =
10739 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10740 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10741}
10742
10743SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
10744 SelectionDAG &DAG) const {
10745 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
10746 EVT MemVT = N->getMemoryVT();
10747 assert(MemVT.getSimpleVT() == MVT::i128 &&(static_cast <bool> (MemVT.getSimpleVT() == MVT::i128 &&
"Expect quadword atomic operations") ? void (0) : __assert_fail
("MemVT.getSimpleVT() == MVT::i128 && \"Expect quadword atomic operations\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10748, __extension__
__PRETTY_FUNCTION__))
10748 "Expect quadword atomic operations")(static_cast <bool> (MemVT.getSimpleVT() == MVT::i128 &&
"Expect quadword atomic operations") ? void (0) : __assert_fail
("MemVT.getSimpleVT() == MVT::i128 && \"Expect quadword atomic operations\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10748, __extension__
__PRETTY_FUNCTION__))
;
10749 SDLoc dl(N);
10750 unsigned Opc = N->getOpcode();
10751 switch (Opc) {
10752 case ISD::ATOMIC_LOAD: {
10753 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
10754 // lowered to ppc instructions by pattern matching instruction selector.
10755 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
10756 SmallVector<SDValue, 4> Ops{
10757 N->getOperand(0),
10758 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
10759 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
10760 Ops.push_back(N->getOperand(I));
10761 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
10762 Ops, MemVT, N->getMemOperand());
10763 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
10764 SDValue ValHi =
10765 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
10766 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
10767 DAG.getConstant(64, dl, MVT::i32));
10768 SDValue Val =
10769 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
10770 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
10771 {Val, LoadedVal.getValue(2)});
10772 }
10773 case ISD::ATOMIC_STORE: {
10774 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
10775 // lowered to ppc instructions by pattern matching instruction selector.
10776 SDVTList Tys = DAG.getVTList(MVT::Other);
10777 SmallVector<SDValue, 4> Ops{
10778 N->getOperand(0),
10779 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
10780 SDValue Val = N->getOperand(2);
10781 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
10782 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
10783 DAG.getConstant(64, dl, MVT::i32));
10784 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
10785 Ops.push_back(ValLo);
10786 Ops.push_back(ValHi);
10787 Ops.push_back(N->getOperand(1));
10788 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
10789 N->getMemOperand());
10790 }
10791 default:
10792 llvm_unreachable("Unexpected atomic opcode")::llvm::llvm_unreachable_internal("Unexpected atomic opcode",
"llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10792)
;
10793 }
10794}
10795
10796SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10797 SelectionDAG &DAG) const {
10798 SDLoc dl(Op);
10799 // Create a stack slot that is 16-byte aligned.
10800 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10801 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10802 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10803 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10804
10805 // Store the input value into Value#0 of the stack slot.
10806 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10807 MachinePointerInfo());
10808 // Load it out.
10809 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10810}
10811
10812SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10813 SelectionDAG &DAG) const {
10814 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&(static_cast <bool> (Op.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? void (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10815, __extension__
__PRETTY_FUNCTION__))
10815 "Should only be called for ISD::INSERT_VECTOR_ELT")(static_cast <bool> (Op.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? void (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10815, __extension__
__PRETTY_FUNCTION__))
;
10816
10817 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10818
10819 EVT VT = Op.getValueType();
10820 SDLoc dl(Op);
10821 SDValue V1 = Op.getOperand(0);
10822 SDValue V2 = Op.getOperand(1);
10823
10824 if (VT == MVT::v2f64 && C)
10825 return Op;
10826
10827 if (Subtarget.hasP9Vector()) {
10828 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
10829 // because on P10, it allows this specific insert_vector_elt load pattern to
10830 // utilize the refactored load and store infrastructure in order to exploit
10831 // prefixed loads.
10832 // On targets with inexpensive direct moves (Power9 and up), a
10833 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
10834 // load since a single precision load will involve conversion to double
10835 // precision on the load followed by another conversion to single precision.
10836 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
10837 (isa<LoadSDNode>(V2))) {
10838 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
10839 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
10840 SDValue InsVecElt =
10841 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
10842 BitcastLoad, Op.getOperand(2));
10843 return DAG.getBitcast(MVT::v4f32, InsVecElt);
10844 }
10845 }
10846
10847 if (Subtarget.isISA3_1()) {
10848 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
10849 return SDValue();
10850 // On P10, we have legal lowering for constant and variable indices for
10851 // all vectors.
10852 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10853 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
10854 return Op;
10855 }
10856
10857 // Before P10, we have legal lowering for constant indices but not for
10858 // variable ones.
10859 if (!C)
10860 return SDValue();
10861
10862 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10863 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10864 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10865 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10866 unsigned InsertAtElement = C->getZExtValue();
10867 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10868 if (Subtarget.isLittleEndian()) {
10869 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10870 }
10871 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10872 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10873 }
10874 return Op;
10875}
10876
10877SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10878 SelectionDAG &DAG) const {
10879 SDLoc dl(Op);
10880 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10881 SDValue LoadChain = LN->getChain();
10882 SDValue BasePtr = LN->getBasePtr();
10883 EVT VT = Op.getValueType();
10884
10885 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10886 return Op;
10887
10888 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10889 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10890 // 2 or 4 vsx registers.
10891 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&(static_cast <bool> ((VT != MVT::v512i1 || Subtarget.hasMMA
()) && "Type unsupported without MMA") ? void (0) : __assert_fail
("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10892, __extension__
__PRETTY_FUNCTION__))
10892 "Type unsupported without MMA")(static_cast <bool> ((VT != MVT::v512i1 || Subtarget.hasMMA
()) && "Type unsupported without MMA") ? void (0) : __assert_fail
("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10892, __extension__
__PRETTY_FUNCTION__))
;
10893 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(static_cast <bool> ((VT != MVT::v256i1 || Subtarget.pairedVectorMemops
()) && "Type unsupported without paired vector support"
) ? void (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10894, __extension__
__PRETTY_FUNCTION__))
10894 "Type unsupported without paired vector support")(static_cast <bool> ((VT != MVT::v256i1 || Subtarget.pairedVectorMemops
()) && "Type unsupported without paired vector support"
) ? void (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10894, __extension__
__PRETTY_FUNCTION__))
;
10895 Align Alignment = LN->getAlign();
10896 SmallVector<SDValue, 4> Loads;
10897 SmallVector<SDValue, 4> LoadChains;
10898 unsigned NumVecs = VT.getSizeInBits() / 128;
10899 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10900 SDValue Load =
10901 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10902 LN->getPointerInfo().getWithOffset(Idx * 16),
10903 commonAlignment(Alignment, Idx * 16),
10904 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10905 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10906 DAG.getConstant(16, dl, BasePtr.getValueType()));
10907 Loads.push_back(Load);
10908 LoadChains.push_back(Load.getValue(1));
10909 }
10910 if (Subtarget.isLittleEndian()) {
10911 std::reverse(Loads.begin(), Loads.end());
10912 std::reverse(LoadChains.begin(), LoadChains.end());
10913 }
10914 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10915 SDValue Value =
10916 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10917 dl, VT, Loads);
10918 SDValue RetOps[] = {Value, TF};
10919 return DAG.getMergeValues(RetOps, dl);
10920}
10921
10922SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10923 SelectionDAG &DAG) const {
10924 SDLoc dl(Op);
10925 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10926 SDValue StoreChain = SN->getChain();
10927 SDValue BasePtr = SN->getBasePtr();
10928 SDValue Value = SN->getValue();
10929 EVT StoreVT = Value.getValueType();
10930
10931 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10932 return Op;
10933
10934 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10935 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10936 // underlying registers individually.
10937 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&(static_cast <bool> ((StoreVT != MVT::v512i1 || Subtarget
.hasMMA()) && "Type unsupported without MMA") ? void (
0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10938, __extension__
__PRETTY_FUNCTION__))
10938 "Type unsupported without MMA")(static_cast <bool> ((StoreVT != MVT::v512i1 || Subtarget
.hasMMA()) && "Type unsupported without MMA") ? void (
0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10938, __extension__
__PRETTY_FUNCTION__))
;
10939 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(static_cast <bool> ((StoreVT != MVT::v256i1 || Subtarget
.pairedVectorMemops()) && "Type unsupported without paired vector support"
) ? void (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10940, __extension__
__PRETTY_FUNCTION__))
10940 "Type unsupported without paired vector support")(static_cast <bool> ((StoreVT != MVT::v256i1 || Subtarget
.pairedVectorMemops()) && "Type unsupported without paired vector support"
) ? void (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 10940, __extension__
__PRETTY_FUNCTION__))
;
10941 Align Alignment = SN->getAlign();
10942 SmallVector<SDValue, 4> Stores;
10943 unsigned NumVecs = 2;
10944 if (StoreVT == MVT::v512i1) {
10945 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10946 NumVecs = 4;
10947 }
10948 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10949 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10950 SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10951 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
10952 SDValue Store =
10953 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10954 SN->getPointerInfo().getWithOffset(Idx * 16),
10955 commonAlignment(Alignment, Idx * 16),
10956 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10957 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10958 DAG.getConstant(16, dl, BasePtr.getValueType()));
10959 Stores.push_back(Store);
10960 }
10961 SDValue TF = DAG.getTokenFactor(dl, Stores);
10962 return TF;
10963}
10964
10965SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10966 SDLoc dl(Op);
10967 if (Op.getValueType() == MVT::v4i32) {
10968 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10969
10970 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10971 // +16 as shift amt.
10972 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10973 SDValue RHSSwap = // = vrlw RHS, 16
10974 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10975
10976 // Shrinkify inputs to v8i16.
10977 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10978 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10979 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10980
10981 // Low parts multiplied together, generating 32-bit results (we ignore the
10982 // top parts).
10983 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10984 LHS, RHS, DAG, dl, MVT::v4i32);
10985
10986 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10987 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10988 // Shift the high parts up 16 bits.
10989 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10990 Neg16, DAG, dl);
10991 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10992 } else if (Op.getValueType() == MVT::v16i8) {
10993 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10994 bool isLittleEndian = Subtarget.isLittleEndian();
10995
10996 // Multiply the even 8-bit parts, producing 16-bit sums.
10997 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10998 LHS, RHS, DAG, dl, MVT::v8i16);
10999 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11000
11001 // Multiply the odd 8-bit parts, producing 16-bit sums.
11002 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11003 LHS, RHS, DAG, dl, MVT::v8i16);
11004 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11005
11006 // Merge the results together. Because vmuleub and vmuloub are
11007 // instructions with a big-endian bias, we must reverse the
11008 // element numbering and reverse the meaning of "odd" and "even"
11009 // when generating little endian code.
11010 int Ops[16];
11011 for (unsigned i = 0; i != 8; ++i) {
11012 if (isLittleEndian) {
11013 Ops[i*2 ] = 2*i;
11014 Ops[i*2+1] = 2*i+16;
11015 } else {
11016 Ops[i*2 ] = 2*i+1;
11017 Ops[i*2+1] = 2*i+1+16;
11018 }
11019 }
11020 if (isLittleEndian)
11021 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11022 else
11023 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11024 } else {
11025 llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11025)
;
11026 }
11027}
11028
11029SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11030 bool IsStrict = Op->isStrictFPOpcode();
11031 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11032 !Subtarget.hasP9Vector())
11033 return SDValue();
11034
11035 return Op;
11036}
11037
11038// Custom lowering for fpext vf32 to v2f64
11039SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11040
11041 assert(Op.getOpcode() == ISD::FP_EXTEND &&(static_cast <bool> (Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND") ? void (0) : __assert_fail
("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11042, __extension__
__PRETTY_FUNCTION__))
11042 "Should only be called for ISD::FP_EXTEND")(static_cast <bool> (Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND") ? void (0) : __assert_fail
("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11042, __extension__
__PRETTY_FUNCTION__))
;
11043
11044 // FIXME: handle extends from half precision float vectors on P9.
11045 // We only want to custom lower an extend from v2f32 to v2f64.
11046 if (Op.getValueType() != MVT::v2f64 ||
11047 Op.getOperand(0).getValueType() != MVT::v2f32)
11048 return SDValue();
11049
11050 SDLoc dl(Op);
11051 SDValue Op0 = Op.getOperand(0);
11052
11053 switch (Op0.getOpcode()) {
11054 default:
11055 return SDValue();
11056 case ISD::EXTRACT_SUBVECTOR: {
11057 assert(Op0.getNumOperands() == 2 &&(static_cast <bool> (Op0.getNumOperands() == 2 &&
isa<ConstantSDNode>(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? void (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11059, __extension__
__PRETTY_FUNCTION__))
11058 isa<ConstantSDNode>(Op0->getOperand(1)) &&(static_cast <bool> (Op0.getNumOperands() == 2 &&
isa<ConstantSDNode>(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? void (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11059, __extension__
__PRETTY_FUNCTION__))
11059 "Node should have 2 operands with second one being a constant!")(static_cast <bool> (Op0.getNumOperands() == 2 &&
isa<ConstantSDNode>(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? void (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11059, __extension__
__PRETTY_FUNCTION__))
;
11060
11061 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11062 return SDValue();
11063
11064 // Custom lower is only done for high or low doubleword.
11065 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11066 if (Idx % 2 != 0)
11067 return SDValue();
11068
11069 // Since input is v4f32, at this point Idx is either 0 or 2.
11070 // Shift to get the doubleword position we want.
11071 int DWord = Idx >> 1;
11072
11073 // High and low word positions are different on little endian.
11074 if (Subtarget.isLittleEndian())
11075 DWord ^= 0x1;
11076
11077 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11078 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11079 }
11080 case ISD::FADD:
11081 case ISD::FMUL:
11082 case ISD::FSUB: {
11083 SDValue NewLoad[2];
11084 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11085 // Ensure both input are loads.
11086 SDValue LdOp = Op0.getOperand(i);
11087 if (LdOp.getOpcode() != ISD::LOAD)
11088 return SDValue();
11089 // Generate new load node.
11090 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11091 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11092 NewLoad[i] = DAG.getMemIntrinsicNode(
11093 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11094 LD->getMemoryVT(), LD->getMemOperand());
11095 }
11096 SDValue NewOp =
11097 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11098 NewLoad[1], Op0.getNode()->getFlags());
11099 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11100 DAG.getConstant(0, dl, MVT::i32));
11101 }
11102 case ISD::LOAD: {
11103 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11104 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11105 SDValue NewLd = DAG.getMemIntrinsicNode(
11106 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11107 LD->getMemoryVT(), LD->getMemOperand());
11108 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11109 DAG.getConstant(0, dl, MVT::i32));
11110 }
11111 }
11112 llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11112)
;
11113}
11114
11115/// LowerOperation - Provide custom lowering hooks for some operations.
11116///
11117SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11118 switch (Op.getOpcode()) {
11119 default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11119)
;
11120 case ISD::FPOW: return lowerPow(Op, DAG);
11121 case ISD::FSIN: return lowerSin(Op, DAG);
11122 case ISD::FCOS: return lowerCos(Op, DAG);
11123 case ISD::FLOG: return lowerLog(Op, DAG);
11124 case ISD::FLOG10: return lowerLog10(Op, DAG);
11125 case ISD::FEXP: return lowerExp(Op, DAG);
11126 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11127 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11128 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11129 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11130 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11131 case ISD::STRICT_FSETCC:
11132 case ISD::STRICT_FSETCCS:
11133 case ISD::SETCC: return LowerSETCC(Op, DAG);
11134 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11135 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11136
11137 case ISD::INLINEASM:
11138 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11139 // Variable argument lowering.
11140 case ISD::VASTART: return LowerVASTART(Op, DAG);
11141 case ISD::VAARG: return LowerVAARG(Op, DAG);
11142 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11143
11144 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11145 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11146 case ISD::GET_DYNAMIC_AREA_OFFSET:
11147 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11148
11149 // Exception handling lowering.
11150 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11151 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11152 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11153
11154 case ISD::LOAD: return LowerLOAD(Op, DAG);
11155 case ISD::STORE: return LowerSTORE(Op, DAG);
11156 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11157 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11158 case ISD::STRICT_FP_TO_UINT:
11159 case ISD::STRICT_FP_TO_SINT:
11160 case ISD::FP_TO_UINT:
11161 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11162 case ISD::STRICT_UINT_TO_FP:
11163 case ISD::STRICT_SINT_TO_FP:
11164 case ISD::UINT_TO_FP:
11165 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11166 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
11167
11168 // Lower 64-bit shifts.
11169 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11170 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11171 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11172
11173 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11174 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11175
11176 // Vector-related lowering.
11177 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11178 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11179 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11180 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11181 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11182 case ISD::MUL: return LowerMUL(Op, DAG);
11183 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11184 case ISD::STRICT_FP_ROUND:
11185 case ISD::FP_ROUND:
11186 return LowerFP_ROUND(Op, DAG);
11187 case ISD::ROTL: return LowerROTL(Op, DAG);
11188
11189 // For counter-based loop handling.
11190 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11191
11192 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11193
11194 // Frame & Return address.
11195 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11196 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11197
11198 case ISD::INTRINSIC_VOID:
11199 return LowerINTRINSIC_VOID(Op, DAG);
11200 case ISD::BSWAP:
11201 return LowerBSWAP(Op, DAG);
11202 case ISD::ATOMIC_CMP_SWAP:
11203 return LowerATOMIC_CMP_SWAP(Op, DAG);
11204 case ISD::ATOMIC_STORE:
11205 return LowerATOMIC_LOAD_STORE(Op, DAG);
11206 }
11207}
11208
11209void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11210 SmallVectorImpl<SDValue>&Results,
11211 SelectionDAG &DAG) const {
11212 SDLoc dl(N);
11213 switch (N->getOpcode()) {
11214 default:
11215 llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11215)
;
11216 case ISD::ATOMIC_LOAD: {
11217 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11218 Results.push_back(Res);
11219 Results.push_back(Res.getValue(1));
11220 break;
11221 }
11222 case ISD::READCYCLECOUNTER: {
11223 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11224 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11225
11226 Results.push_back(
11227 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11228 Results.push_back(RTB.getValue(2));
11229 break;
11230 }
11231 case ISD::INTRINSIC_W_CHAIN: {
11232 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11233 Intrinsic::loop_decrement)
11234 break;
11235
11236 assert(N->getValueType(0) == MVT::i1 &&(static_cast <bool> (N->getValueType(0) == MVT::i1 &&
"Unexpected result type for CTR decrement intrinsic") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11237, __extension__
__PRETTY_FUNCTION__))
11237 "Unexpected result type for CTR decrement intrinsic")(static_cast <bool> (N->getValueType(0) == MVT::i1 &&
"Unexpected result type for CTR decrement intrinsic") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11237, __extension__
__PRETTY_FUNCTION__))
;
11238 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11239 N->getValueType(0));
11240 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11241 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11242 N->getOperand(1));
11243
11244 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11245 Results.push_back(NewInt.getValue(1));
11246 break;
11247 }
11248 case ISD::INTRINSIC_WO_CHAIN: {
11249 switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11250 case Intrinsic::ppc_pack_longdouble:
11251 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11252 N->getOperand(2), N->getOperand(1)));
11253 break;
11254 case Intrinsic::ppc_maxfe:
11255 case Intrinsic::ppc_minfe:
11256 case Intrinsic::ppc_fnmsub:
11257 case Intrinsic::ppc_convert_f128_to_ppcf128:
11258 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11259 break;
11260 }
11261 break;
11262 }
11263 case ISD::VAARG: {
11264 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11265 return;
11266
11267 EVT VT = N->getValueType(0);
11268
11269 if (VT == MVT::i64) {
11270 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11271
11272 Results.push_back(NewNode);
11273 Results.push_back(NewNode.getValue(1));
11274 }
11275 return;
11276 }
11277 case ISD::STRICT_FP_TO_SINT:
11278 case ISD::STRICT_FP_TO_UINT:
11279 case ISD::FP_TO_SINT:
11280 case ISD::FP_TO_UINT: {
11281 // LowerFP_TO_INT() can only handle f32 and f64.
11282 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11283 MVT::ppcf128)
11284 return;
11285 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11286 Results.push_back(LoweredValue);
11287 if (N->isStrictFPOpcode())
11288 Results.push_back(LoweredValue.getValue(1));
11289 return;
11290 }
11291 case ISD::TRUNCATE: {
11292 if (!N->getValueType(0).isVector())
11293 return;
11294 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11295 if (Lowered)
11296 Results.push_back(Lowered);
11297 return;
11298 }
11299 case ISD::FSHL:
11300 case ISD::FSHR:
11301 // Don't handle funnel shifts here.
11302 return;
11303 case ISD::BITCAST:
11304 // Don't handle bitcast here.
11305 return;
11306 case ISD::FP_EXTEND:
11307 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11308 if (Lowered)
11309 Results.push_back(Lowered);
11310 return;
11311 }
11312}
11313
11314//===----------------------------------------------------------------------===//
11315// Other Lowering Code
11316//===----------------------------------------------------------------------===//
11317
11318static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
11319 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11320 Function *Func = Intrinsic::getDeclaration(M, Id);
11321 return Builder.CreateCall(Func, {});
11322}
11323
11324// The mappings for emitLeading/TrailingFence is taken from
11325// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11326Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
11327 Instruction *Inst,
11328 AtomicOrdering Ord) const {
11329 if (Ord == AtomicOrdering::SequentiallyConsistent)
11330 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11331 if (isReleaseOrStronger(Ord))
11332 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11333 return nullptr;
11334}
11335
11336Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
11337 Instruction *Inst,
11338 AtomicOrdering Ord) const {
11339 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11340 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11341 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11342 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11343 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11344 return Builder.CreateCall(
11345 Intrinsic::getDeclaration(
11346 Builder.GetInsertBlock()->getParent()->getParent(),
11347 Intrinsic::ppc_cfence, {Inst->getType()}),
11348 {Inst});
11349 // FIXME: Can use isync for rmw operation.
11350 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11351 }
11352 return nullptr;
11353}
11354
11355MachineBasicBlock *
11356PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11357 unsigned AtomicSize,
11358 unsigned BinOpcode,
11359 unsigned CmpOpcode,
11360 unsigned CmpPred) const {
11361 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11362 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11363
11364 auto LoadMnemonic = PPC::LDARX;
11365 auto StoreMnemonic = PPC::STDCX;
11366 switch (AtomicSize) {
11367 default:
11368 llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11368)
;
11369 case 1:
11370 LoadMnemonic = PPC::LBARX;
11371 StoreMnemonic = PPC::STBCX;
11372 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")(static_cast <bool> (Subtarget.hasPartwordAtomics() &&
"Call this only with size >=4") ? void (0) : __assert_fail
("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11372, __extension__
__PRETTY_FUNCTION__))
;
11373 break;
11374 case 2:
11375 LoadMnemonic = PPC::LHARX;
11376 StoreMnemonic = PPC::STHCX;
11377 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")(static_cast <bool> (Subtarget.hasPartwordAtomics() &&
"Call this only with size >=4") ? void (0) : __assert_fail
("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11377, __extension__
__PRETTY_FUNCTION__))
;
11378 break;
11379 case 4:
11380 LoadMnemonic = PPC::LWARX;
11381 StoreMnemonic = PPC::STWCX;
11382 break;
11383 case 8:
11384 LoadMnemonic = PPC::LDARX;
11385 StoreMnemonic = PPC::STDCX;
11386 break;
11387 }
11388
11389 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11390 MachineFunction *F = BB->getParent();
11391 MachineFunction::iterator It = ++BB->getIterator();
11392
11393 Register dest = MI.getOperand(0).getReg();
11394 Register ptrA = MI.getOperand(1).getReg();
11395 Register ptrB = MI.getOperand(2).getReg();
11396 Register incr = MI.getOperand(3).getReg();
11397 DebugLoc dl = MI.getDebugLoc();
11398
11399 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11400 MachineBasicBlock *loop2MBB =
11401 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11402 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11403 F->insert(It, loopMBB);
11404 if (CmpOpcode)
11405 F->insert(It, loop2MBB);
11406 F->insert(It, exitMBB);
11407 exitMBB->splice(exitMBB->begin(), BB,
11408 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11409 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11410
11411 MachineRegisterInfo &RegInfo = F->getRegInfo();
11412 Register TmpReg = (!BinOpcode) ? incr :
11413 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11414 : &PPC::GPRCRegClass);
11415
11416 // thisMBB:
11417 // ...
11418 // fallthrough --> loopMBB
11419 BB->addSuccessor(loopMBB);
11420
11421 // loopMBB:
11422 // l[wd]arx dest, ptr
11423 // add r0, dest, incr
11424 // st[wd]cx. r0, ptr
11425 // bne- loopMBB
11426 // fallthrough --> exitMBB
11427
11428 // For max/min...
11429 // loopMBB:
11430 // l[wd]arx dest, ptr
11431 // cmpl?[wd] incr, dest
11432 // bgt exitMBB
11433 // loop2MBB:
11434 // st[wd]cx. dest, ptr
11435 // bne- loopMBB
11436 // fallthrough --> exitMBB
11437
11438 BB = loopMBB;
11439 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11440 .addReg(ptrA).addReg(ptrB);
11441 if (BinOpcode)
11442 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11443 if (CmpOpcode) {
11444 // Signed comparisons of byte or halfword values must be sign-extended.
11445 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11446 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11447 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11448 ExtReg).addReg(dest);
11449 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11450 .addReg(incr).addReg(ExtReg);
11451 } else
11452 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11453 .addReg(incr).addReg(dest);
11454
11455 BuildMI(BB, dl, TII->get(PPC::BCC))
11456 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11457 BB->addSuccessor(loop2MBB);
11458 BB->addSuccessor(exitMBB);
11459 BB = loop2MBB;
11460 }
11461 BuildMI(BB, dl, TII->get(StoreMnemonic))
11462 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11463 BuildMI(BB, dl, TII->get(PPC::BCC))
11464 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11465 BB->addSuccessor(loopMBB);
11466 BB->addSuccessor(exitMBB);
11467
11468 // exitMBB:
11469 // ...
11470 BB = exitMBB;
11471 return BB;
11472}
11473
11474static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
11475 switch(MI.getOpcode()) {
11476 default:
11477 return false;
11478 case PPC::COPY:
11479 return TII->isSignExtended(MI);
11480 case PPC::LHA:
11481 case PPC::LHA8:
11482 case PPC::LHAU:
11483 case PPC::LHAU8:
11484 case PPC::LHAUX:
11485 case PPC::LHAUX8:
11486 case PPC::LHAX:
11487 case PPC::LHAX8:
11488 case PPC::LWA:
11489 case PPC::LWAUX:
11490 case PPC::LWAX:
11491 case PPC::LWAX_32:
11492 case PPC::LWA_32:
11493 case PPC::PLHA:
11494 case PPC::PLHA8:
11495 case PPC::PLHA8pc:
11496 case PPC::PLHApc:
11497 case PPC::PLWA:
11498 case PPC::PLWA8:
11499 case PPC::PLWA8pc:
11500 case PPC::PLWApc:
11501 case PPC::EXTSB:
11502 case PPC::EXTSB8:
11503 case PPC::EXTSB8_32_64:
11504 case PPC::EXTSB8_rec:
11505 case PPC::EXTSB_rec:
11506 case PPC::EXTSH:
11507 case PPC::EXTSH8:
11508 case PPC::EXTSH8_32_64:
11509 case PPC::EXTSH8_rec:
11510 case PPC::EXTSH_rec:
11511 case PPC::EXTSW:
11512 case PPC::EXTSWSLI:
11513 case PPC::EXTSWSLI_32_64:
11514 case PPC::EXTSWSLI_32_64_rec:
11515 case PPC::EXTSWSLI_rec:
11516 case PPC::EXTSW_32:
11517 case PPC::EXTSW_32_64:
11518 case PPC::EXTSW_32_64_rec:
11519 case PPC::EXTSW_rec:
11520 case PPC::SRAW:
11521 case PPC::SRAWI:
11522 case PPC::SRAWI_rec:
11523 case PPC::SRAW_rec:
11524 return true;
11525 }
11526 return false;
11527}
11528
11529MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11530 MachineInstr &MI, MachineBasicBlock *BB,
11531 bool is8bit, // operation
11532 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11533 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11534 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11535
11536 // If this is a signed comparison and the value being compared is not known
11537 // to be sign extended, sign extend it here.
11538 DebugLoc dl = MI.getDebugLoc();
11539 MachineFunction *F = BB->getParent();
11540 MachineRegisterInfo &RegInfo = F->getRegInfo();
11541 Register incr = MI.getOperand(3).getReg();
11542 bool IsSignExtended = Register::isVirtualRegister(incr) &&
11543 isSignExtended(*RegInfo.getVRegDef(incr), TII);
11544
11545 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11546 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11547 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11548 .addReg(MI.getOperand(3).getReg());
11549 MI.getOperand(3).setReg(ValueReg);
11550 }
11551 // If we support part-word atomic mnemonics, just use them
11552 if (Subtarget.hasPartwordAtomics())
11553 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11554 CmpPred);
11555
11556 // In 64 bit mode we have to use 64 bits for addresses, even though the
11557 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11558 // registers without caring whether they're 32 or 64, but here we're
11559 // doing actual arithmetic on the addresses.
11560 bool is64bit = Subtarget.isPPC64();
11561 bool isLittleEndian = Subtarget.isLittleEndian();
11562 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11563
11564 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11565 MachineFunction::iterator It = ++BB->getIterator();
11566
11567 Register dest = MI.getOperand(0).getReg();
11568 Register ptrA = MI.getOperand(1).getReg();
11569 Register ptrB = MI.getOperand(2).getReg();
11570
11571 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11572 MachineBasicBlock *loop2MBB =
11573 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11574 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11575 F->insert(It, loopMBB);
11576 if (CmpOpcode)
11577 F->insert(It, loop2MBB);
11578 F->insert(It, exitMBB);
11579 exitMBB->splice(exitMBB->begin(), BB,
11580 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11581 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11582
11583 const TargetRegisterClass *RC =
11584 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11585 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11586
11587 Register PtrReg = RegInfo.createVirtualRegister(RC);
11588 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11589 Register ShiftReg =
11590 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11591 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11592 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11593 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11594 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11595 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11596 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11597 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11598 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11599 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
11600 Register Ptr1Reg;
11601 Register TmpReg =
11602 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11603
11604 // thisMBB:
11605 // ...
11606 // fallthrough --> loopMBB
11607 BB->addSuccessor(loopMBB);
11608
11609 // The 4-byte load must be aligned, while a char or short may be
11610 // anywhere in the word. Hence all this nasty bookkeeping code.
11611 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11612 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11613 // xori shift, shift1, 24 [16]
11614 // rlwinm ptr, ptr1, 0, 0, 29
11615 // slw incr2, incr, shift
11616 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11617 // slw mask, mask2, shift
11618 // loopMBB:
11619 // lwarx tmpDest, ptr
11620 // add tmp, tmpDest, incr2
11621 // andc tmp2, tmpDest, mask
11622 // and tmp3, tmp, mask
11623 // or tmp4, tmp3, tmp2
11624 // stwcx. tmp4, ptr
11625 // bne- loopMBB
11626 // fallthrough --> exitMBB
11627 // srw SrwDest, tmpDest, shift
11628 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
11629 if (ptrA != ZeroReg) {
11630 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11631 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11632 .addReg(ptrA)
11633 .addReg(ptrB);
11634 } else {
11635 Ptr1Reg = ptrB;
11636 }
11637 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11638 // mode.
11639 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11640 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11641 .addImm(3)
11642 .addImm(27)
11643 .addImm(is8bit ? 28 : 27);
11644 if (!isLittleEndian)
11645 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11646 .addReg(Shift1Reg)
11647 .addImm(is8bit ? 24 : 16);
11648 if (is64bit)
11649 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11650 .addReg(Ptr1Reg)
11651 .addImm(0)
11652 .addImm(61);
11653 else
11654 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11655 .addReg(Ptr1Reg)
11656 .addImm(0)
11657 .addImm(0)
11658 .addImm(29);
11659 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11660 if (is8bit)
11661 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11662 else {
11663 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11664 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11665 .addReg(Mask3Reg)
11666 .addImm(65535);
11667 }
11668 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11669 .addReg(Mask2Reg)
11670 .addReg(ShiftReg);
11671
11672 BB = loopMBB;
11673 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11674 .addReg(ZeroReg)
11675 .addReg(PtrReg);
11676 if (BinOpcode)
11677 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11678 .addReg(Incr2Reg)
11679 .addReg(TmpDestReg);
11680 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11681 .addReg(TmpDestReg)
11682 .addReg(MaskReg);
11683 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11684 if (CmpOpcode) {
11685 // For unsigned comparisons, we can directly compare the shifted values.
11686 // For signed comparisons we shift and sign extend.
11687 Register SReg = RegInfo.createVirtualRegister(GPRC);
11688 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11689 .addReg(TmpDestReg)
11690 .addReg(MaskReg);
11691 unsigned ValueReg = SReg;
11692 unsigned CmpReg = Incr2Reg;
11693 if (CmpOpcode == PPC::CMPW) {
11694 ValueReg = RegInfo.createVirtualRegister(GPRC);
11695 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11696 .addReg(SReg)
11697 .addReg(ShiftReg);
11698 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11699 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11700 .addReg(ValueReg);
11701 ValueReg = ValueSReg;
11702 CmpReg = incr;
11703 }
11704 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11705 .addReg(CmpReg)
11706 .addReg(ValueReg);
11707 BuildMI(BB, dl, TII->get(PPC::BCC))
11708 .addImm(CmpPred)
11709 .addReg(PPC::CR0)
11710 .addMBB(exitMBB);
11711 BB->addSuccessor(loop2MBB);
11712 BB->addSuccessor(exitMBB);
11713 BB = loop2MBB;
11714 }
11715 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11716 BuildMI(BB, dl, TII->get(PPC::STWCX))
11717 .addReg(Tmp4Reg)
11718 .addReg(ZeroReg)
11719 .addReg(PtrReg);
11720 BuildMI(BB, dl, TII->get(PPC::BCC))
11721 .addImm(PPC::PRED_NE)
11722 .addReg(PPC::CR0)
11723 .addMBB(loopMBB);
11724 BB->addSuccessor(loopMBB);
11725 BB->addSuccessor(exitMBB);
11726
11727 // exitMBB:
11728 // ...
11729 BB = exitMBB;
11730 // Since the shift amount is not a constant, we need to clear
11731 // the upper bits with a separate RLWINM.
11732 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
11733 .addReg(SrwDestReg)
11734 .addImm(0)
11735 .addImm(is8bit ? 24 : 16)
11736 .addImm(31);
11737 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
11738 .addReg(TmpDestReg)
11739 .addReg(ShiftReg);
11740 return BB;
11741}
11742
11743llvm::MachineBasicBlock *
11744PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11745 MachineBasicBlock *MBB) const {
11746 DebugLoc DL = MI.getDebugLoc();
11747 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11748 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11749
11750 MachineFunction *MF = MBB->getParent();
11751 MachineRegisterInfo &MRI = MF->getRegInfo();
11752
11753 const BasicBlock *BB = MBB->getBasicBlock();
11754 MachineFunction::iterator I = ++MBB->getIterator();
11755
11756 Register DstReg = MI.getOperand(0).getReg();
11757 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11758 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")(static_cast <bool> (TRI->isTypeLegalForClass(*RC, MVT
::i32) && "Invalid destination!") ? void (0) : __assert_fail
("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11758, __extension__
__PRETTY_FUNCTION__))
;
11759 Register mainDstReg = MRI.createVirtualRegister(RC);
11760 Register restoreDstReg = MRI.createVirtualRegister(RC);
11761
11762 MVT PVT = getPointerTy(MF->getDataLayout());
11763 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(static_cast <bool> ((PVT == MVT::i64 || PVT == MVT::i32
) && "Invalid Pointer Size!") ? void (0) : __assert_fail
("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11764, __extension__
__PRETTY_FUNCTION__))
11764 "Invalid Pointer Size!")(static_cast <bool> ((PVT == MVT::i64 || PVT == MVT::i32
) && "Invalid Pointer Size!") ? void (0) : __assert_fail
("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11764, __extension__
__PRETTY_FUNCTION__))
;
11765 // For v = setjmp(buf), we generate
11766 //
11767 // thisMBB:
11768 // SjLjSetup mainMBB
11769 // bl mainMBB
11770 // v_restore = 1
11771 // b sinkMBB
11772 //
11773 // mainMBB:
11774 // buf[LabelOffset] = LR
11775 // v_main = 0
11776 //
11777 // sinkMBB:
11778 // v = phi(main, restore)
11779 //
11780
11781 MachineBasicBlock *thisMBB = MBB;
11782 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11783 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11784 MF->insert(I, mainMBB);
11785 MF->insert(I, sinkMBB);
11786
11787 MachineInstrBuilder MIB;
11788
11789 // Transfer the remainder of BB and its successor edges to sinkMBB.
11790 sinkMBB->splice(sinkMBB->begin(), MBB,
11791 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11792 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11793
11794 // Note that the structure of the jmp_buf used here is not compatible
11795 // with that used by libc, and is not designed to be. Specifically, it
11796 // stores only those 'reserved' registers that LLVM does not otherwise
11797 // understand how to spill. Also, by convention, by the time this
11798 // intrinsic is called, Clang has already stored the frame address in the
11799 // first slot of the buffer and stack address in the third. Following the
11800 // X86 target code, we'll store the jump address in the second slot. We also
11801 // need to save the TOC pointer (R2) to handle jumps between shared
11802 // libraries, and that will be stored in the fourth slot. The thread
11803 // identifier (R13) is not affected.
11804
11805 // thisMBB:
11806 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11807 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11808 const int64_t BPOffset = 4 * PVT.getStoreSize();
11809
11810 // Prepare IP either in reg.
11811 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11812 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11813 Register BufReg = MI.getOperand(1).getReg();
11814
11815 if (Subtarget.is64BitELFABI()) {
11816 setUsesTOCBasePtr(*MBB->getParent());
11817 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11818 .addReg(PPC::X2)
11819 .addImm(TOCOffset)
11820 .addReg(BufReg)
11821 .cloneMemRefs(MI);
11822 }
11823
11824 // Naked functions never have a base pointer, and so we use r1. For all
11825 // other functions, this decision must be delayed until during PEI.
11826 unsigned BaseReg;
11827 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11828 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11829 else
11830 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11831
11832 MIB = BuildMI(*thisMBB, MI, DL,
11833 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11834 .addReg(BaseReg)
11835 .addImm(BPOffset)
11836 .addReg(BufReg)
11837 .cloneMemRefs(MI);
11838
11839 // Setup
11840 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11841 MIB.addRegMask(TRI->getNoPreservedMask());
11842
11843 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11844
11845 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11846 .addMBB(mainMBB);
11847 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11848
11849 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11850 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11851
11852 // mainMBB:
11853 // mainDstReg = 0
11854 MIB =
11855 BuildMI(mainMBB, DL,
11856 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11857
11858 // Store IP
11859 if (Subtarget.isPPC64()) {
11860 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11861 .addReg(LabelReg)
11862 .addImm(LabelOffset)
11863 .addReg(BufReg);
11864 } else {
11865 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11866 .addReg(LabelReg)
11867 .addImm(LabelOffset)
11868 .addReg(BufReg);
11869 }
11870 MIB.cloneMemRefs(MI);
11871
11872 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11873 mainMBB->addSuccessor(sinkMBB);
11874
11875 // sinkMBB:
11876 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11877 TII->get(PPC::PHI), DstReg)
11878 .addReg(mainDstReg).addMBB(mainMBB)
11879 .addReg(restoreDstReg).addMBB(thisMBB);
11880
11881 MI.eraseFromParent();
11882 return sinkMBB;
11883}
11884
11885MachineBasicBlock *
11886PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11887 MachineBasicBlock *MBB) const {
11888 DebugLoc DL = MI.getDebugLoc();
11889 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11890
11891 MachineFunction *MF = MBB->getParent();
11892 MachineRegisterInfo &MRI = MF->getRegInfo();
11893
11894 MVT PVT = getPointerTy(MF->getDataLayout());
11895 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(static_cast <bool> ((PVT == MVT::i64 || PVT == MVT::i32
) && "Invalid Pointer Size!") ? void (0) : __assert_fail
("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11896, __extension__
__PRETTY_FUNCTION__))
11896 "Invalid Pointer Size!")(static_cast <bool> ((PVT == MVT::i64 || PVT == MVT::i32
) && "Invalid Pointer Size!") ? void (0) : __assert_fail
("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11896, __extension__
__PRETTY_FUNCTION__))
;
11897
11898 const TargetRegisterClass *RC =
11899 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11900 Register Tmp = MRI.createVirtualRegister(RC);
11901 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11902 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11903 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11904 unsigned BP =
11905 (PVT == MVT::i64)
11906 ? PPC::X30
11907 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11908 : PPC::R30);
11909
11910 MachineInstrBuilder MIB;
11911
11912 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11913 const int64_t SPOffset = 2 * PVT.getStoreSize();
11914 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11915 const int64_t BPOffset = 4 * PVT.getStoreSize();
11916
11917 Register BufReg = MI.getOperand(0).getReg();
11918
11919 // Reload FP (the jumped-to function may not have had a
11920 // frame pointer, and if so, then its r31 will be restored
11921 // as necessary).
11922 if (PVT == MVT::i64) {
11923 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11924 .addImm(0)
11925 .addReg(BufReg);
11926 } else {
11927 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11928 .addImm(0)
11929 .addReg(BufReg);
11930 }
11931 MIB.cloneMemRefs(MI);
11932
11933 // Reload IP
11934 if (PVT == MVT::i64) {
11935 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11936 .addImm(LabelOffset)
11937 .addReg(BufReg);
11938 } else {
11939 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11940 .addImm(LabelOffset)
11941 .addReg(BufReg);
11942 }
11943 MIB.cloneMemRefs(MI);
11944
11945 // Reload SP
11946 if (PVT == MVT::i64) {
11947 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11948 .addImm(SPOffset)
11949 .addReg(BufReg);
11950 } else {
11951 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11952 .addImm(SPOffset)
11953 .addReg(BufReg);
11954 }
11955 MIB.cloneMemRefs(MI);
11956
11957 // Reload BP
11958 if (PVT == MVT::i64) {
11959 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11960 .addImm(BPOffset)
11961 .addReg(BufReg);
11962 } else {
11963 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11964 .addImm(BPOffset)
11965 .addReg(BufReg);
11966 }
11967 MIB.cloneMemRefs(MI);
11968
11969 // Reload TOC
11970 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11971 setUsesTOCBasePtr(*MBB->getParent());
11972 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11973 .addImm(TOCOffset)
11974 .addReg(BufReg)
11975 .cloneMemRefs(MI);
11976 }
11977
11978 // Jump
11979 BuildMI(*MBB, MI, DL,
11980 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11981 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11982
11983 MI.eraseFromParent();
11984 return MBB;
11985}
11986
11987bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11988 // If the function specifically requests inline stack probes, emit them.
11989 if (MF.getFunction().hasFnAttribute("probe-stack"))
11990 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11991 "inline-asm";
11992 return false;
11993}
11994
11995unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11996 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11997 unsigned StackAlign = TFI->getStackAlignment();
11998 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&(static_cast <bool> (StackAlign >= 1 && isPowerOf2_32
(StackAlign) && "Unexpected stack alignment") ? void (
0) : __assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11999, __extension__
__PRETTY_FUNCTION__))
11999 "Unexpected stack alignment")(static_cast <bool> (StackAlign >= 1 && isPowerOf2_32
(StackAlign) && "Unexpected stack alignment") ? void (
0) : __assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 11999, __extension__
__PRETTY_FUNCTION__))
;
12000 // The default stack probe size is 4096 if the function has no
12001 // stack-probe-size attribute.
12002 unsigned StackProbeSize = 4096;
12003 const Function &Fn = MF.getFunction();
12004 if (Fn.hasFnAttribute("stack-probe-size"))
12005 Fn.getFnAttribute("stack-probe-size")
12006 .getValueAsString()
12007 .getAsInteger(0, StackProbeSize);
12008 // Round down to the stack alignment.
12009 StackProbeSize &= ~(StackAlign - 1);
12010 return StackProbeSize ? StackProbeSize : StackAlign;
12011}
12012
12013// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12014// into three phases. In the first phase, it uses pseudo instruction
12015// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12016// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12017// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12018// MaxCallFrameSize so that it can calculate correct data area pointer.
12019MachineBasicBlock *
12020PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
12021 MachineBasicBlock *MBB) const {
12022 const bool isPPC64 = Subtarget.isPPC64();
12023 MachineFunction *MF = MBB->getParent();
12024 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12025 DebugLoc DL = MI.getDebugLoc();
12026 const unsigned ProbeSize = getStackProbeSize(*MF);
12027 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12028 MachineRegisterInfo &MRI = MF->getRegInfo();
12029 // The CFG of probing stack looks as
12030 // +-----+
12031 // | MBB |
12032 // +--+--+
12033 // |
12034 // +----v----+
12035 // +--->+ TestMBB +---+
12036 // | +----+----+ |
12037 // | | |
12038 // | +-----v----+ |
12039 // +---+ BlockMBB | |
12040 // +----------+ |
12041 // |
12042 // +---------+ |
12043 // | TailMBB +<--+
12044 // +---------+
12045 // In MBB, calculate previous frame pointer and final stack pointer.
12046 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12047 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12048 // TailMBB is spliced via \p MI.
12049 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12050 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12051 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12052
12053 MachineFunction::iterator MBBIter = ++MBB->getIterator();
12054 MF->insert(MBBIter, TestMBB);
12055 MF->insert(MBBIter, BlockMBB);
12056 MF->insert(MBBIter, TailMBB);
12057
12058 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12059 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12060
12061 Register DstReg = MI.getOperand(0).getReg();
12062 Register NegSizeReg = MI.getOperand(1).getReg();
12063 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12064 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12065 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12066 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12067
12068 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12069 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12070 // NegSize.
12071 unsigned ProbeOpc;
12072 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12073 ProbeOpc =
12074 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12075 else
12076 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12077 // and NegSizeReg will be allocated in the same phyreg to avoid
12078 // redundant copy when NegSizeReg has only one use which is current MI and
12079 // will be replaced by PREPARE_PROBED_ALLOCA then.
12080 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12081 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12082 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12083 .addDef(ActualNegSizeReg)
12084 .addReg(NegSizeReg)
12085 .add(MI.getOperand(2))
12086 .add(MI.getOperand(3));
12087
12088 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12089 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12090 FinalStackPtr)
12091 .addReg(SPReg)
12092 .addReg(ActualNegSizeReg);
12093
12094 // Materialize a scratch register for update.
12095 int64_t NegProbeSize = -(int64_t)ProbeSize;
12096 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!")(static_cast <bool> (isInt<32>(NegProbeSize) &&
"Unhandled probe size!") ? void (0) : __assert_fail ("isInt<32>(NegProbeSize) && \"Unhandled probe size!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12096, __extension__
__PRETTY_FUNCTION__))
;
12097 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12098 if (!isInt<16>(NegProbeSize)) {
12099 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12100 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12101 .addImm(NegProbeSize >> 16);
12102 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12103 ScratchReg)
12104 .addReg(TempReg)
12105 .addImm(NegProbeSize & 0xFFFF);
12106 } else
12107 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12108 .addImm(NegProbeSize);
12109
12110 {
12111 // Probing leading residual part.
12112 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12113 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12114 .addReg(ActualNegSizeReg)
12115 .addReg(ScratchReg);
12116 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12117 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12118 .addReg(Div)
12119 .addReg(ScratchReg);
12120 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12121 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12122 .addReg(Mul)
12123 .addReg(ActualNegSizeReg);
12124 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12125 .addReg(FramePointer)
12126 .addReg(SPReg)
12127 .addReg(NegMod);
12128 }
12129
12130 {
12131 // Remaining part should be multiple of ProbeSize.
12132 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12133 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12134 .addReg(SPReg)
12135 .addReg(FinalStackPtr);
12136 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12137 .addImm(PPC::PRED_EQ)
12138 .addReg(CmpResult)
12139 .addMBB(TailMBB);
12140 TestMBB->addSuccessor(BlockMBB);
12141 TestMBB->addSuccessor(TailMBB);
12142 }
12143
12144 {
12145 // Touch the block.
12146 // |P...|P...|P...
12147 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12148 .addReg(FramePointer)
12149 .addReg(SPReg)
12150 .addReg(ScratchReg);
12151 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12152 BlockMBB->addSuccessor(TestMBB);
12153 }
12154
12155 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12156 // DYNAREAOFFSET pseudo instruction to get the future result.
12157 Register MaxCallFrameSizeReg =
12158 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12159 BuildMI(TailMBB, DL,
12160 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12161 MaxCallFrameSizeReg)
12162 .add(MI.getOperand(2))
12163 .add(MI.getOperand(3));
12164 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12165 .addReg(SPReg)
12166 .addReg(MaxCallFrameSizeReg);
12167
12168 // Splice instructions after MI to TailMBB.
12169 TailMBB->splice(TailMBB->end(), MBB,
12170 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12171 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
12172 MBB->addSuccessor(TestMBB);
12173
12174 // Delete the pseudo instruction.
12175 MI.eraseFromParent();
12176
12177 ++NumDynamicAllocaProbed;
12178 return TailMBB;
12179}
12180
12181MachineBasicBlock *
12182PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
12183 MachineBasicBlock *BB) const {
12184 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12185 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12186 if (Subtarget.is64BitELFABI() &&
12187 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12188 !Subtarget.isUsingPCRelativeCalls()) {
12189 // Call lowering should have added an r2 operand to indicate a dependence
12190 // on the TOC base pointer value. It can't however, because there is no
12191 // way to mark the dependence as implicit there, and so the stackmap code
12192 // will confuse it with a regular operand. Instead, add the dependence
12193 // here.
12194 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12195 }
12196
12197 return emitPatchPoint(MI, BB);
12198 }
12199
12200 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12201 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12202 return emitEHSjLjSetJmp(MI, BB);
12203 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12204 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12205 return emitEHSjLjLongJmp(MI, BB);
12206 }
12207
12208 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12209
12210 // To "insert" these instructions we actually have to insert their
12211 // control-flow patterns.
12212 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12213 MachineFunction::iterator It = ++BB->getIterator();
12214
12215 MachineFunction *F = BB->getParent();
12216 MachineRegisterInfo &MRI = F->getRegInfo();
12217
12218 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12219 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12220 MI.getOpcode() == PPC::SELECT_I8) {
12221 SmallVector<MachineOperand, 2> Cond;
12222 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12223 MI.getOpcode() == PPC::SELECT_CC_I8)
12224 Cond.push_back(MI.getOperand(4));
12225 else
12226 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
12227 Cond.push_back(MI.getOperand(1));
12228
12229 DebugLoc dl = MI.getDebugLoc();
12230 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12231 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12232 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12233 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12234 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12235 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12236 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12237 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12238 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12239 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12240 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12241 MI.getOpcode() == PPC::SELECT_F4 ||
12242 MI.getOpcode() == PPC::SELECT_F8 ||
12243 MI.getOpcode() == PPC::SELECT_F16 ||
12244 MI.getOpcode() == PPC::SELECT_SPE ||
12245 MI.getOpcode() == PPC::SELECT_SPE4 ||
12246 MI.getOpcode() == PPC::SELECT_VRRC ||
12247 MI.getOpcode() == PPC::SELECT_VSFRC ||
12248 MI.getOpcode() == PPC::SELECT_VSSRC ||
12249 MI.getOpcode() == PPC::SELECT_VSRC) {
12250 // The incoming instruction knows the destination vreg to set, the
12251 // condition code register to branch on, the true/false values to
12252 // select between, and a branch opcode to use.
12253
12254 // thisMBB:
12255 // ...
12256 // TrueVal = ...
12257 // cmpTY ccX, r1, r2
12258 // bCC copy1MBB
12259 // fallthrough --> copy0MBB
12260 MachineBasicBlock *thisMBB = BB;
12261 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12262 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12263 DebugLoc dl = MI.getDebugLoc();
12264 F->insert(It, copy0MBB);
12265 F->insert(It, sinkMBB);
12266
12267 // Transfer the remainder of BB and its successor edges to sinkMBB.
12268 sinkMBB->splice(sinkMBB->begin(), BB,
12269 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12270 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12271
12272 // Next, add the true and fallthrough blocks as its successors.
12273 BB->addSuccessor(copy0MBB);
12274 BB->addSuccessor(sinkMBB);
12275
12276 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12277 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12278 MI.getOpcode() == PPC::SELECT_F16 ||
12279 MI.getOpcode() == PPC::SELECT_SPE4 ||
12280 MI.getOpcode() == PPC::SELECT_SPE ||
12281 MI.getOpcode() == PPC::SELECT_VRRC ||
12282 MI.getOpcode() == PPC::SELECT_VSFRC ||
12283 MI.getOpcode() == PPC::SELECT_VSSRC ||
12284 MI.getOpcode() == PPC::SELECT_VSRC) {
12285 BuildMI(BB, dl, TII->get(PPC::BC))
12286 .addReg(MI.getOperand(1).getReg())
12287 .addMBB(sinkMBB);
12288 } else {
12289 unsigned SelectPred = MI.getOperand(4).getImm();
12290 BuildMI(BB, dl, TII->get(PPC::BCC))
12291 .addImm(SelectPred)
12292 .addReg(MI.getOperand(1).getReg())
12293 .addMBB(sinkMBB);
12294 }
12295
12296 // copy0MBB:
12297 // %FalseValue = ...
12298 // # fallthrough to sinkMBB
12299 BB = copy0MBB;
12300
12301 // Update machine-CFG edges
12302 BB->addSuccessor(sinkMBB);
12303
12304 // sinkMBB:
12305 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12306 // ...
12307 BB = sinkMBB;
12308 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12309 .addReg(MI.getOperand(3).getReg())
12310 .addMBB(copy0MBB)
12311 .addReg(MI.getOperand(2).getReg())
12312 .addMBB(thisMBB);
12313 } else if (MI.getOpcode() == PPC::ReadTB) {
12314 // To read the 64-bit time-base register on a 32-bit target, we read the
12315 // two halves. Should the counter have wrapped while it was being read, we
12316 // need to try again.
12317 // ...
12318 // readLoop:
12319 // mfspr Rx,TBU # load from TBU
12320 // mfspr Ry,TB # load from TB
12321 // mfspr Rz,TBU # load from TBU
12322 // cmpw crX,Rx,Rz # check if 'old'='new'
12323 // bne readLoop # branch if they're not equal
12324 // ...
12325
12326 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12327 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12328 DebugLoc dl = MI.getDebugLoc();
12329 F->insert(It, readMBB);
12330 F->insert(It, sinkMBB);
12331
12332 // Transfer the remainder of BB and its successor edges to sinkMBB.
12333 sinkMBB->splice(sinkMBB->begin(), BB,
12334 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12335 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12336
12337 BB->addSuccessor(readMBB);
12338 BB = readMBB;
12339
12340 MachineRegisterInfo &RegInfo = F->getRegInfo();
12341 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12342 Register LoReg = MI.getOperand(0).getReg();
12343 Register HiReg = MI.getOperand(1).getReg();
12344
12345 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12346 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12347 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12348
12349 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12350
12351 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12352 .addReg(HiReg)
12353 .addReg(ReadAgainReg);
12354 BuildMI(BB, dl, TII->get(PPC::BCC))
12355 .addImm(PPC::PRED_NE)
12356 .addReg(CmpReg)
12357 .addMBB(readMBB);
12358
12359 BB->addSuccessor(readMBB);
12360 BB->addSuccessor(sinkMBB);
12361 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12362 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12363 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12364 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12365 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12366 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12367 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12368 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12369
12370 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12371 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12372 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12373 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12374 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12375 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12376 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12377 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12378
12379 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12380 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12381 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12382 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12383 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12384 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12385 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12386 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12387
12388 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12389 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12390 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12391 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12392 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12393 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12394 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12395 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12396
12397 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12398 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12399 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12400 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12401 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12402 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12403 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12404 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12405
12406 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12407 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12408 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12409 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12410 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12411 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12412 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12413 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12414
12415 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12416 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12417 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12418 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12419 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12420 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12421 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12422 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12423
12424 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12425 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12426 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12427 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12428 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12429 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12430 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12431 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12432
12433 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12434 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12435 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12436 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12437 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12438 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12439 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12440 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12441
12442 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12443 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12444 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12445 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12446 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12447 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12448 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12449 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12450
12451 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12452 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12453 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12454 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12455 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12456 BB = EmitAtomicBinary(MI, BB, 4, 0);
12457 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12458 BB = EmitAtomicBinary(MI, BB, 8, 0);
12459 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12460 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12461 (Subtarget.hasPartwordAtomics() &&
12462 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12463 (Subtarget.hasPartwordAtomics() &&
12464 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12465 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12466
12467 auto LoadMnemonic = PPC::LDARX;
12468 auto StoreMnemonic = PPC::STDCX;
12469 switch (MI.getOpcode()) {
12470 default:
12471 llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12471)
;
12472 case PPC::ATOMIC_CMP_SWAP_I8:
12473 LoadMnemonic = PPC::LBARX;
12474 StoreMnemonic = PPC::STBCX;
12475 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")(static_cast <bool> (Subtarget.hasPartwordAtomics() &&
"No support partword atomics.") ? void (0) : __assert_fail (
"Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12475, __extension__
__PRETTY_FUNCTION__))
;
12476 break;
12477 case PPC::ATOMIC_CMP_SWAP_I16:
12478 LoadMnemonic = PPC::LHARX;
12479 StoreMnemonic = PPC::STHCX;
12480 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")(static_cast <bool> (Subtarget.hasPartwordAtomics() &&
"No support partword atomics.") ? void (0) : __assert_fail (
"Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12480, __extension__
__PRETTY_FUNCTION__))
;
12481 break;
12482 case PPC::ATOMIC_CMP_SWAP_I32:
12483 LoadMnemonic = PPC::LWARX;
12484 StoreMnemonic = PPC::STWCX;
12485 break;
12486 case PPC::ATOMIC_CMP_SWAP_I64:
12487 LoadMnemonic = PPC::LDARX;
12488 StoreMnemonic = PPC::STDCX;
12489 break;
12490 }
12491 Register dest = MI.getOperand(0).getReg();
12492 Register ptrA = MI.getOperand(1).getReg();
12493 Register ptrB = MI.getOperand(2).getReg();
12494 Register oldval = MI.getOperand(3).getReg();
12495 Register newval = MI.getOperand(4).getReg();
12496 DebugLoc dl = MI.getDebugLoc();
12497
12498 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12499 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12500 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12501 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12502 F->insert(It, loop1MBB);
12503 F->insert(It, loop2MBB);
12504 F->insert(It, midMBB);
12505 F->insert(It, exitMBB);
12506 exitMBB->splice(exitMBB->begin(), BB,
12507 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12508 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12509
12510 // thisMBB:
12511 // ...
12512 // fallthrough --> loopMBB
12513 BB->addSuccessor(loop1MBB);
12514
12515 // loop1MBB:
12516 // l[bhwd]arx dest, ptr
12517 // cmp[wd] dest, oldval
12518 // bne- midMBB
12519 // loop2MBB:
12520 // st[bhwd]cx. newval, ptr
12521 // bne- loopMBB
12522 // b exitBB
12523 // midMBB:
12524 // st[bhwd]cx. dest, ptr
12525 // exitBB:
12526 BB = loop1MBB;
12527 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12528 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12529 .addReg(oldval)
12530 .addReg(dest);
12531 BuildMI(BB, dl, TII->get(PPC::BCC))
12532 .addImm(PPC::PRED_NE)
12533 .addReg(PPC::CR0)
12534 .addMBB(midMBB);
12535 BB->addSuccessor(loop2MBB);
12536 BB->addSuccessor(midMBB);
12537
12538 BB = loop2MBB;
12539 BuildMI(BB, dl, TII->get(StoreMnemonic))
12540 .addReg(newval)
12541 .addReg(ptrA)
12542 .addReg(ptrB);
12543 BuildMI(BB, dl, TII->get(PPC::BCC))
12544 .addImm(PPC::PRED_NE)
12545 .addReg(PPC::CR0)
12546 .addMBB(loop1MBB);
12547 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12548 BB->addSuccessor(loop1MBB);
12549 BB->addSuccessor(exitMBB);
12550
12551 BB = midMBB;
12552 BuildMI(BB, dl, TII->get(StoreMnemonic))
12553 .addReg(dest)
12554 .addReg(ptrA)
12555 .addReg(ptrB);
12556 BB->addSuccessor(exitMBB);
12557
12558 // exitMBB:
12559 // ...
12560 BB = exitMBB;
12561 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12562 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12563 // We must use 64-bit registers for addresses when targeting 64-bit,
12564 // since we're actually doing arithmetic on them. Other registers
12565 // can be 32-bit.
12566 bool is64bit = Subtarget.isPPC64();
12567 bool isLittleEndian = Subtarget.isLittleEndian();
12568 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12569
12570 Register dest = MI.getOperand(0).getReg();
12571 Register ptrA = MI.getOperand(1).getReg();
12572 Register ptrB = MI.getOperand(2).getReg();
12573 Register oldval = MI.getOperand(3).getReg();
12574 Register newval = MI.getOperand(4).getReg();
12575 DebugLoc dl = MI.getDebugLoc();
12576
12577 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12578 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12579 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12580 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12581 F->insert(It, loop1MBB);
12582 F->insert(It, loop2MBB);
12583 F->insert(It, midMBB);
12584 F->insert(It, exitMBB);
12585 exitMBB->splice(exitMBB->begin(), BB,
12586 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12587 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12588
12589 MachineRegisterInfo &RegInfo = F->getRegInfo();
12590 const TargetRegisterClass *RC =
12591 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12592 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12593
12594 Register PtrReg = RegInfo.createVirtualRegister(RC);
12595 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12596 Register ShiftReg =
12597 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12598 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12599 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12600 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12601 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12602 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12603 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12604 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12605 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12606 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12607 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12608 Register Ptr1Reg;
12609 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12610 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12611 // thisMBB:
12612 // ...
12613 // fallthrough --> loopMBB
12614 BB->addSuccessor(loop1MBB);
12615
12616 // The 4-byte load must be aligned, while a char or short may be
12617 // anywhere in the word. Hence all this nasty bookkeeping code.
12618 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12619 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12620 // xori shift, shift1, 24 [16]
12621 // rlwinm ptr, ptr1, 0, 0, 29
12622 // slw newval2, newval, shift
12623 // slw oldval2, oldval,shift
12624 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12625 // slw mask, mask2, shift
12626 // and newval3, newval2, mask
12627 // and oldval3, oldval2, mask
12628 // loop1MBB:
12629 // lwarx tmpDest, ptr
12630 // and tmp, tmpDest, mask
12631 // cmpw tmp, oldval3
12632 // bne- midMBB
12633 // loop2MBB:
12634 // andc tmp2, tmpDest, mask
12635 // or tmp4, tmp2, newval3
12636 // stwcx. tmp4, ptr
12637 // bne- loop1MBB
12638 // b exitBB
12639 // midMBB:
12640 // stwcx. tmpDest, ptr
12641 // exitBB:
12642 // srw dest, tmpDest, shift
12643 if (ptrA != ZeroReg) {
12644 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12645 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12646 .addReg(ptrA)
12647 .addReg(ptrB);
12648 } else {
12649 Ptr1Reg = ptrB;
12650 }
12651
12652 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12653 // mode.
12654 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12655 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12656 .addImm(3)
12657 .addImm(27)
12658 .addImm(is8bit ? 28 : 27);
12659 if (!isLittleEndian)
12660 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12661 .addReg(Shift1Reg)
12662 .addImm(is8bit ? 24 : 16);
12663 if (is64bit)
12664 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12665 .addReg(Ptr1Reg)
12666 .addImm(0)
12667 .addImm(61);
12668 else
12669 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12670 .addReg(Ptr1Reg)
12671 .addImm(0)
12672 .addImm(0)
12673 .addImm(29);
12674 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12675 .addReg(newval)
12676 .addReg(ShiftReg);
12677 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12678 .addReg(oldval)
12679 .addReg(ShiftReg);
12680 if (is8bit)
12681 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12682 else {
12683 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12684 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12685 .addReg(Mask3Reg)
12686 .addImm(65535);
12687 }
12688 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12689 .addReg(Mask2Reg)
12690 .addReg(ShiftReg);
12691 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12692 .addReg(NewVal2Reg)
12693 .addReg(MaskReg);
12694 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12695 .addReg(OldVal2Reg)
12696 .addReg(MaskReg);
12697
12698 BB = loop1MBB;
12699 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12700 .addReg(ZeroReg)
12701 .addReg(PtrReg);
12702 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12703 .addReg(TmpDestReg)
12704 .addReg(MaskReg);
12705 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12706 .addReg(TmpReg)
12707 .addReg(OldVal3Reg);
12708 BuildMI(BB, dl, TII->get(PPC::BCC))
12709 .addImm(PPC::PRED_NE)
12710 .addReg(PPC::CR0)
12711 .addMBB(midMBB);
12712 BB->addSuccessor(loop2MBB);
12713 BB->addSuccessor(midMBB);
12714
12715 BB = loop2MBB;
12716 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12717 .addReg(TmpDestReg)
12718 .addReg(MaskReg);
12719 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12720 .addReg(Tmp2Reg)
12721 .addReg(NewVal3Reg);
12722 BuildMI(BB, dl, TII->get(PPC::STWCX))
12723 .addReg(Tmp4Reg)
12724 .addReg(ZeroReg)
12725 .addReg(PtrReg);
12726 BuildMI(BB, dl, TII->get(PPC::BCC))
12727 .addImm(PPC::PRED_NE)
12728 .addReg(PPC::CR0)
12729 .addMBB(loop1MBB);
12730 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12731 BB->addSuccessor(loop1MBB);
12732 BB->addSuccessor(exitMBB);
12733
12734 BB = midMBB;
12735 BuildMI(BB, dl, TII->get(PPC::STWCX))
12736 .addReg(TmpDestReg)
12737 .addReg(ZeroReg)
12738 .addReg(PtrReg);
12739 BB->addSuccessor(exitMBB);
12740
12741 // exitMBB:
12742 // ...
12743 BB = exitMBB;
12744 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12745 .addReg(TmpReg)
12746 .addReg(ShiftReg);
12747 } else if (MI.getOpcode() == PPC::FADDrtz) {
12748 // This pseudo performs an FADD with rounding mode temporarily forced
12749 // to round-to-zero. We emit this via custom inserter since the FPSCR
12750 // is not modeled at the SelectionDAG level.
12751 Register Dest = MI.getOperand(0).getReg();
12752 Register Src1 = MI.getOperand(1).getReg();
12753 Register Src2 = MI.getOperand(2).getReg();
12754 DebugLoc dl = MI.getDebugLoc();
12755
12756 MachineRegisterInfo &RegInfo = F->getRegInfo();
12757 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12758
12759 // Save FPSCR value.
12760 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12761
12762 // Set rounding mode to round-to-zero.
12763 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12764 .addImm(31)
12765 .addReg(PPC::RM, RegState::ImplicitDefine);
12766
12767 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12768 .addImm(30)
12769 .addReg(PPC::RM, RegState::ImplicitDefine);
12770
12771 // Perform addition.
12772 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12773 .addReg(Src1)
12774 .addReg(Src2);
12775 if (MI.getFlag(MachineInstr::NoFPExcept))
12776 MIB.setMIFlag(MachineInstr::NoFPExcept);
12777
12778 // Restore FPSCR value.
12779 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12780 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12781 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12782 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12783 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12784 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12785 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12786 ? PPC::ANDI8_rec
12787 : PPC::ANDI_rec;
12788 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12789 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12790
12791 MachineRegisterInfo &RegInfo = F->getRegInfo();
12792 Register Dest = RegInfo.createVirtualRegister(
12793 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12794
12795 DebugLoc Dl = MI.getDebugLoc();
12796 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12797 .addReg(MI.getOperand(1).getReg())
12798 .addImm(1);
12799 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12800 MI.getOperand(0).getReg())
12801 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12802 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12803 DebugLoc Dl = MI.getDebugLoc();
12804 MachineRegisterInfo &RegInfo = F->getRegInfo();
12805 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12806 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12807 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12808 MI.getOperand(0).getReg())
12809 .addReg(CRReg);
12810 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12811 DebugLoc Dl = MI.getDebugLoc();
12812 unsigned Imm = MI.getOperand(1).getImm();
12813 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12814 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12815 MI.getOperand(0).getReg())
12816 .addReg(PPC::CR0EQ);
12817 } else if (MI.getOpcode() == PPC::SETRNDi) {
12818 DebugLoc dl = MI.getDebugLoc();
12819 Register OldFPSCRReg = MI.getOperand(0).getReg();
12820
12821 // Save FPSCR value.
12822 if (MRI.use_empty(OldFPSCRReg))
12823 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12824 else
12825 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12826
12827 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12828 // the following settings:
12829 // 00 Round to nearest
12830 // 01 Round to 0
12831 // 10 Round to +inf
12832 // 11 Round to -inf
12833
12834 // When the operand is immediate, using the two least significant bits of
12835 // the immediate to set the bits 62:63 of FPSCR.
12836 unsigned Mode = MI.getOperand(1).getImm();
12837 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12838 .addImm(31)
12839 .addReg(PPC::RM, RegState::ImplicitDefine);
12840
12841 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12842 .addImm(30)
12843 .addReg(PPC::RM, RegState::ImplicitDefine);
12844 } else if (MI.getOpcode() == PPC::SETRND) {
12845 DebugLoc dl = MI.getDebugLoc();
12846
12847 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12848 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12849 // If the target doesn't have DirectMove, we should use stack to do the
12850 // conversion, because the target doesn't have the instructions like mtvsrd
12851 // or mfvsrd to do this conversion directly.
12852 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12853 if (Subtarget.hasDirectMove()) {
12854 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12855 .addReg(SrcReg);
12856 } else {
12857 // Use stack to do the register copy.
12858 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12859 MachineRegisterInfo &RegInfo = F->getRegInfo();
12860 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12861 if (RC == &PPC::F8RCRegClass) {
12862 // Copy register from F8RCRegClass to G8RCRegclass.
12863 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(static_cast <bool> ((RegInfo.getRegClass(DestReg) == &
PPC::G8RCRegClass) && "Unsupported RegClass.") ? void
(0) : __assert_fail ("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12864, __extension__
__PRETTY_FUNCTION__))
12864 "Unsupported RegClass.")(static_cast <bool> ((RegInfo.getRegClass(DestReg) == &
PPC::G8RCRegClass) && "Unsupported RegClass.") ? void
(0) : __assert_fail ("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12864, __extension__
__PRETTY_FUNCTION__))
;
12865
12866 StoreOp = PPC::STFD;
12867 LoadOp = PPC::LD;
12868 } else {
12869 // Copy register from G8RCRegClass to F8RCRegclass.
12870 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(static_cast <bool> ((RegInfo.getRegClass(SrcReg) == &
PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) ==
&PPC::F8RCRegClass) && "Unsupported RegClass.") ?
void (0) : __assert_fail ("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12872, __extension__
__PRETTY_FUNCTION__))
12871 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(static_cast <bool> ((RegInfo.getRegClass(SrcReg) == &
PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) ==
&PPC::F8RCRegClass) && "Unsupported RegClass.") ?
void (0) : __assert_fail ("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12872, __extension__
__PRETTY_FUNCTION__))
12872 "Unsupported RegClass.")(static_cast <bool> ((RegInfo.getRegClass(SrcReg) == &
PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) ==
&PPC::F8RCRegClass) && "Unsupported RegClass.") ?
void (0) : __assert_fail ("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 12872, __extension__
__PRETTY_FUNCTION__))
;
12873 }
12874
12875 MachineFrameInfo &MFI = F->getFrameInfo();
12876 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12877
12878 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12879 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12880 MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12881 MFI.getObjectAlign(FrameIdx));
12882
12883 // Store the SrcReg into the stack.
12884 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12885 .addReg(SrcReg)
12886 .addImm(0)
12887 .addFrameIndex(FrameIdx)
12888 .addMemOperand(MMOStore);
12889
12890 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12891 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12892 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12893 MFI.getObjectAlign(FrameIdx));
12894
12895 // Load from the stack where SrcReg is stored, and save to DestReg,
12896 // so we have done the RegClass conversion from RegClass::SrcReg to
12897 // RegClass::DestReg.
12898 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12899 .addImm(0)
12900 .addFrameIndex(FrameIdx)
12901 .addMemOperand(MMOLoad);
12902 }
12903 };
12904
12905 Register OldFPSCRReg = MI.getOperand(0).getReg();
12906
12907 // Save FPSCR value.
12908 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12909
12910 // When the operand is gprc register, use two least significant bits of the
12911 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12912 //
12913 // copy OldFPSCRTmpReg, OldFPSCRReg
12914 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12915 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12916 // copy NewFPSCRReg, NewFPSCRTmpReg
12917 // mtfsf 255, NewFPSCRReg
12918 MachineOperand SrcOp = MI.getOperand(1);
12919 MachineRegisterInfo &RegInfo = F->getRegInfo();
12920 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12921
12922 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12923
12924 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12925 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12926
12927 // The first operand of INSERT_SUBREG should be a register which has
12928 // subregisters, we only care about its RegClass, so we should use an
12929 // IMPLICIT_DEF register.
12930 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12931 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12932 .addReg(ImDefReg)
12933 .add(SrcOp)
12934 .addImm(1);
12935
12936 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12937 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12938 .addReg(OldFPSCRTmpReg)
12939 .addReg(ExtSrcReg)
12940 .addImm(0)
12941 .addImm(62);
12942
12943 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12944 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12945
12946 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12947 // bits of FPSCR.
12948 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12949 .addImm(255)
12950 .addReg(NewFPSCRReg)
12951 .addImm(0)
12952 .addImm(0);
12953 } else if (MI.getOpcode() == PPC::SETFLM) {
12954 DebugLoc Dl = MI.getDebugLoc();
12955
12956 // Result of setflm is previous FPSCR content, so we need to save it first.
12957 Register OldFPSCRReg = MI.getOperand(0).getReg();
12958 if (MRI.use_empty(OldFPSCRReg))
12959 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12960 else
12961 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12962
12963 // Put bits in 32:63 to FPSCR.
12964 Register NewFPSCRReg = MI.getOperand(1).getReg();
12965 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12966 .addImm(255)
12967 .addReg(NewFPSCRReg)
12968 .addImm(0)
12969 .addImm(0);
12970 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12971 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12972 return emitProbedAlloca(MI, BB);
12973 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12974 DebugLoc DL = MI.getDebugLoc();
12975 Register Src = MI.getOperand(2).getReg();
12976 Register Lo = MI.getOperand(0).getReg();
12977 Register Hi = MI.getOperand(1).getReg();
12978 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12979 .addDef(Lo)
12980 .addUse(Src, 0, PPC::sub_gp8_x1);
12981 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12982 .addDef(Hi)
12983 .addUse(Src, 0, PPC::sub_gp8_x0);
12984 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
12985 MI.getOpcode() == PPC::STQX_PSEUDO) {
12986 DebugLoc DL = MI.getDebugLoc();
12987 // Ptr is used as the ptr_rc_no_r0 part
12988 // of LQ/STQ's memory operand and adding result of RA and RB,
12989 // so it has to be g8rc_and_g8rc_nox0.
12990 Register Ptr =
12991 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
12992 Register Val = MI.getOperand(0).getReg();
12993 Register RA = MI.getOperand(1).getReg();
12994 Register RB = MI.getOperand(2).getReg();
12995 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
12996 BuildMI(*BB, MI, DL,
12997 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
12998 : TII->get(PPC::STQ))
12999 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13000 .addImm(0)
13001 .addReg(Ptr);
13002 } else {
13003 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13003)
;
13004 }
13005
13006 MI.eraseFromParent(); // The pseudo instruction is gone now.
13007 return BB;
13008}
13009
13010//===----------------------------------------------------------------------===//
13011// Target Optimization Hooks
13012//===----------------------------------------------------------------------===//
13013
13014static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13015 // For the estimates, convergence is quadratic, so we essentially double the
13016 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13017 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13018 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13019 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13020 if (VT.getScalarType() == MVT::f64)
13021 RefinementSteps++;
13022 return RefinementSteps;
13023}
13024
13025SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13026 const DenormalMode &Mode) const {
13027 // We only have VSX Vector Test for software Square Root.
13028 EVT VT = Op.getValueType();
13029 if (!isTypeLegal(MVT::i1) ||
13030 (VT != MVT::f64 &&
13031 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13032 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13033
13034 SDLoc DL(Op);
13035 // The output register of FTSQRT is CR field.
13036 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13037 // ftsqrt BF,FRB
13038 // Let e_b be the unbiased exponent of the double-precision
13039 // floating-point operand in register FRB.
13040 // fe_flag is set to 1 if either of the following conditions occurs.
13041 // - The double-precision floating-point operand in register FRB is a zero,
13042 // a NaN, or an infinity, or a negative value.
13043 // - e_b is less than or equal to -970.
13044 // Otherwise fe_flag is set to 0.
13045 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13046 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13047 // exponent is less than -970)
13048 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13049 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13050 FTSQRT, SRIdxVal),
13051 0);
13052}
13053
13054SDValue
13055PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13056 SelectionDAG &DAG) const {
13057 // We only have VSX Vector Square Root.
13058 EVT VT = Op.getValueType();
13059 if (VT != MVT::f64 &&
13060 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13061 return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
13062
13063 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13064}
13065
13066SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13067 int Enabled, int &RefinementSteps,
13068 bool &UseOneConstNR,
13069 bool Reciprocal) const {
13070 EVT VT = Operand.getValueType();
13071 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13072 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13073 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13074 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13075 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13076 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13077
13078 // The Newton-Raphson computation with a single constant does not provide
13079 // enough accuracy on some CPUs.
13080 UseOneConstNR = !Subtarget.needsTwoConstNR();
13081 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13082 }
13083 return SDValue();
13084}
13085
13086SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13087 int Enabled,
13088 int &RefinementSteps) const {
13089 EVT VT = Operand.getValueType();
13090 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13091 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13092 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13093 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13094 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13095 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13096 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13097 }
13098 return SDValue();
13099}
13100
13101unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13102 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13103 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13104 // enabled for division), this functionality is redundant with the default
13105 // combiner logic (once the division -> reciprocal/multiply transformation
13106 // has taken place). As a result, this matters more for older cores than for
13107 // newer ones.
13108
13109 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13110 // reciprocal if there are two or more FDIVs (for embedded cores with only
13111 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13112 switch (Subtarget.getCPUDirective()) {
13113 default:
13114 return 3;
13115 case PPC::DIR_440:
13116 case PPC::DIR_A2:
13117 case PPC::DIR_E500:
13118 case PPC::DIR_E500mc:
13119 case PPC::DIR_E5500:
13120 return 2;
13121 }
13122}
13123
13124// isConsecutiveLSLoc needs to work even if all adds have not yet been
13125// collapsed, and so we need to look through chains of them.
13126static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
13127 int64_t& Offset, SelectionDAG &DAG) {
13128 if (DAG.isBaseWithConstantOffset(Loc)) {
13129 Base = Loc.getOperand(0);
13130 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13131
13132 // The base might itself be a base plus an offset, and if so, accumulate
13133 // that as well.
13134 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13135 }
13136}
13137
13138static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
13139 unsigned Bytes, int Dist,
13140 SelectionDAG &DAG) {
13141 if (VT.getSizeInBits() / 8 != Bytes)
13142 return false;
13143
13144 SDValue BaseLoc = Base->getBasePtr();
13145 if (Loc.getOpcode() == ISD::FrameIndex) {
13146 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13147 return false;
13148 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13149 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13150 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13151 int FS = MFI.getObjectSize(FI);
13152 int BFS = MFI.getObjectSize(BFI);
13153 if (FS != BFS || FS != (int)Bytes) return false;
13154 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13155 }
13156
13157 SDValue Base1 = Loc, Base2 = BaseLoc;
13158 int64_t Offset1 = 0, Offset2 = 0;
13159 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13160 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13161 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13162 return true;
13163
13164 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13165 const GlobalValue *GV1 = nullptr;
13166 const GlobalValue *GV2 = nullptr;
13167 Offset1 = 0;
13168 Offset2 = 0;
13169 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13170 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13171 if (isGA1 && isGA2 && GV1 == GV2)
13172 return Offset1 == (Offset2 + Dist*Bytes);
13173 return false;
13174}
13175
13176// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13177// not enforce equality of the chain operands.
13178static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
13179 unsigned Bytes, int Dist,
13180 SelectionDAG &DAG) {
13181 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13182 EVT VT = LS->getMemoryVT();
13183 SDValue Loc = LS->getBasePtr();
13184 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13185 }
13186
13187 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13188 EVT VT;
13189 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13190 default: return false;
13191 case Intrinsic::ppc_altivec_lvx:
13192 case Intrinsic::ppc_altivec_lvxl:
13193 case Intrinsic::ppc_vsx_lxvw4x:
13194 case Intrinsic::ppc_vsx_lxvw4x_be:
13195 VT = MVT::v4i32;
13196 break;
13197 case Intrinsic::ppc_vsx_lxvd2x:
13198 case Intrinsic::ppc_vsx_lxvd2x_be:
13199 VT = MVT::v2f64;
13200 break;
13201 case Intrinsic::ppc_altivec_lvebx:
13202 VT = MVT::i8;
13203 break;
13204 case Intrinsic::ppc_altivec_lvehx:
13205 VT = MVT::i16;
13206 break;
13207 case Intrinsic::ppc_altivec_lvewx:
13208 VT = MVT::i32;
13209 break;
13210 }
13211
13212 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13213 }
13214
13215 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13216 EVT VT;
13217 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13218 default: return false;
13219 case Intrinsic::ppc_altivec_stvx:
13220 case Intrinsic::ppc_altivec_stvxl:
13221 case Intrinsic::ppc_vsx_stxvw4x:
13222 VT = MVT::v4i32;
13223 break;
13224 case Intrinsic::ppc_vsx_stxvd2x:
13225 VT = MVT::v2f64;
13226 break;
13227 case Intrinsic::ppc_vsx_stxvw4x_be:
13228 VT = MVT::v4i32;
13229 break;
13230 case Intrinsic::ppc_vsx_stxvd2x_be:
13231 VT = MVT::v2f64;
13232 break;
13233 case Intrinsic::ppc_altivec_stvebx:
13234 VT = MVT::i8;
13235 break;
13236 case Intrinsic::ppc_altivec_stvehx:
13237 VT = MVT::i16;
13238 break;
13239 case Intrinsic::ppc_altivec_stvewx:
13240 VT = MVT::i32;
13241 break;
13242 }
13243
13244 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13245 }
13246
13247 return false;
13248}
13249
13250// Return true is there is a nearyby consecutive load to the one provided
13251// (regardless of alignment). We search up and down the chain, looking though
13252// token factors and other loads (but nothing else). As a result, a true result
13253// indicates that it is safe to create a new consecutive load adjacent to the
13254// load provided.
13255static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
13256 SDValue Chain = LD->getChain();
13257 EVT VT = LD->getMemoryVT();
13258
13259 SmallSet<SDNode *, 16> LoadRoots;
13260 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13261 SmallSet<SDNode *, 16> Visited;
13262
13263 // First, search up the chain, branching to follow all token-factor operands.
13264 // If we find a consecutive load, then we're done, otherwise, record all
13265 // nodes just above the top-level loads and token factors.
13266 while (!Queue.empty()) {
13267 SDNode *ChainNext = Queue.pop_back_val();
13268 if (!Visited.insert(ChainNext).second)
13269 continue;
13270
13271 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13272 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13273 return true;
13274
13275 if (!Visited.count(ChainLD->getChain().getNode()))
13276 Queue.push_back(ChainLD->getChain().getNode());
13277 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13278 for (const SDUse &O : ChainNext->ops())
13279 if (!Visited.count(O.getNode()))
13280 Queue.push_back(O.getNode());
13281 } else
13282 LoadRoots.insert(ChainNext);
13283 }
13284
13285 // Second, search down the chain, starting from the top-level nodes recorded
13286 // in the first phase. These top-level nodes are the nodes just above all
13287 // loads and token factors. Starting with their uses, recursively look though
13288 // all loads (just the chain uses) and token factors to find a consecutive
13289 // load.
13290 Visited.clear();
13291 Queue.clear();
13292
13293 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
13294 IE = LoadRoots.end(); I != IE; ++I) {
13295 Queue.push_back(*I);
13296
13297 while (!Queue.empty()) {
13298 SDNode *LoadRoot = Queue.pop_back_val();
13299 if (!Visited.insert(LoadRoot).second)
13300 continue;
13301
13302 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13303 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13304 return true;
13305
13306 for (SDNode *U : LoadRoot->uses())
13307 if (((isa<MemSDNode>(U) &&
13308 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13309 U->getOpcode() == ISD::TokenFactor) &&
13310 !Visited.count(U))
13311 Queue.push_back(U);
13312 }
13313 }
13314
13315 return false;
13316}
13317
13318/// This function is called when we have proved that a SETCC node can be replaced
13319/// by subtraction (and other supporting instructions) so that the result of
13320/// comparison is kept in a GPR instead of CR. This function is purely for
13321/// codegen purposes and has some flags to guide the codegen process.
13322static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13323 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13324 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"ISD::SETCC Expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13324, __extension__
__PRETTY_FUNCTION__))
;
13325
13326 // Zero extend the operands to the largest legal integer. Originally, they
13327 // must be of a strictly smaller size.
13328 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13329 DAG.getConstant(Size, DL, MVT::i32));
13330 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13331 DAG.getConstant(Size, DL, MVT::i32));
13332
13333 // Swap if needed. Depends on the condition code.
13334 if (Swap)
13335 std::swap(Op0, Op1);
13336
13337 // Subtract extended integers.
13338 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13339
13340 // Move the sign bit to the least significant position and zero out the rest.
13341 // Now the least significant bit carries the result of original comparison.
13342 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13343 DAG.getConstant(Size - 1, DL, MVT::i32));
13344 auto Final = Shifted;
13345
13346 // Complement the result if needed. Based on the condition code.
13347 if (Complement)
13348 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13349 DAG.getConstant(1, DL, MVT::i64));
13350
13351 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13352}
13353
13354SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13355 DAGCombinerInfo &DCI) const {
13356 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"ISD::SETCC Expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13356, __extension__
__PRETTY_FUNCTION__))
;
13357
13358 SelectionDAG &DAG = DCI.DAG;
13359 SDLoc DL(N);
13360
13361 // Size of integers being compared has a critical role in the following
13362 // analysis, so we prefer to do this when all types are legal.
13363 if (!DCI.isAfterLegalizeDAG())
13364 return SDValue();
13365
13366 // If all users of SETCC extend its value to a legal integer type
13367 // then we replace SETCC with a subtraction
13368 for (const SDNode *U : N->uses())
13369 if (U->getOpcode() != ISD::ZERO_EXTEND)
13370 return SDValue();
13371
13372 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13373 auto OpSize = N->getOperand(0).getValueSizeInBits();
13374
13375 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
13376
13377 if (OpSize < Size) {
13378 switch (CC) {
13379 default: break;
13380 case ISD::SETULT:
13381 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13382 case ISD::SETULE:
13383 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13384 case ISD::SETUGT:
13385 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13386 case ISD::SETUGE:
13387 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13388 }
13389 }
13390
13391 return SDValue();
13392}
13393
13394SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13395 DAGCombinerInfo &DCI) const {
13396 SelectionDAG &DAG = DCI.DAG;
13397 SDLoc dl(N);
13398
13399 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")(static_cast <bool> (Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? void (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13399, __extension__
__PRETTY_FUNCTION__))
;
13400 // If we're tracking CR bits, we need to be careful that we don't have:
13401 // trunc(binary-ops(zext(x), zext(y)))
13402 // or
13403 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13404 // such that we're unnecessarily moving things into GPRs when it would be
13405 // better to keep them in CR bits.
13406
13407 // Note that trunc here can be an actual i1 trunc, or can be the effective
13408 // truncation that comes from a setcc or select_cc.
13409 if (N->getOpcode() == ISD::TRUNCATE &&
13410 N->getValueType(0) != MVT::i1)
13411 return SDValue();
13412
13413 if (N->getOperand(0).getValueType() != MVT::i32 &&
13414 N->getOperand(0).getValueType() != MVT::i64)
13415 return SDValue();
13416
13417 if (N->getOpcode() == ISD::SETCC ||
13418 N->getOpcode() == ISD::SELECT_CC) {
13419 // If we're looking at a comparison, then we need to make sure that the
13420 // high bits (all except for the first) don't matter the result.
13421 ISD::CondCode CC =
13422 cast<CondCodeSDNode>(N->getOperand(
13423 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13424 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13425
13426 if (ISD::isSignedIntSetCC(CC)) {
13427 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13428 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13429 return SDValue();
13430 } else if (ISD::isUnsignedIntSetCC(CC)) {
13431 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13432 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13433 !DAG.MaskedValueIsZero(N->getOperand(1),
13434 APInt::getHighBitsSet(OpBits, OpBits-1)))
13435 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13436 : SDValue());
13437 } else {
13438 // This is neither a signed nor an unsigned comparison, just make sure
13439 // that the high bits are equal.
13440 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13441 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13442
13443 // We don't really care about what is known about the first bit (if
13444 // anything), so pretend that it is known zero for both to ensure they can
13445 // be compared as constants.
13446 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13447 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13448
13449 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13450 Op1Known.getConstant() != Op2Known.getConstant())
13451 return SDValue();
13452 }
13453 }
13454
13455 // We now know that the higher-order bits are irrelevant, we just need to
13456 // make sure that all of the intermediate operations are bit operations, and
13457 // all inputs are extensions.
13458 if (N->getOperand(0).getOpcode() != ISD::AND &&
13459 N->getOperand(0).getOpcode() != ISD::OR &&
13460 N->getOperand(0).getOpcode() != ISD::XOR &&
13461 N->getOperand(0).getOpcode() != ISD::SELECT &&
13462 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13463 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13464 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13465 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13466 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13467 return SDValue();
13468
13469 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13470 N->getOperand(1).getOpcode() != ISD::AND &&
13471 N->getOperand(1).getOpcode() != ISD::OR &&
13472 N->getOperand(1).getOpcode() != ISD::XOR &&
13473 N->getOperand(1).getOpcode() != ISD::SELECT &&
13474 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13475 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13476 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13477 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13478 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13479 return SDValue();
13480
13481 SmallVector<SDValue, 4> Inputs;
13482 SmallVector<SDValue, 8> BinOps, PromOps;
13483 SmallPtrSet<SDNode *, 16> Visited;
13484
13485 for (unsigned i = 0; i < 2; ++i) {
13486 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13487 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13488 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13489 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13490 isa<ConstantSDNode>(N->getOperand(i)))
13491 Inputs.push_back(N->getOperand(i));
13492 else
13493 BinOps.push_back(N->getOperand(i));
13494
13495 if (N->getOpcode() == ISD::TRUNCATE)
13496 break;
13497 }
13498
13499 // Visit all inputs, collect all binary operations (and, or, xor and
13500 // select) that are all fed by extensions.
13501 while (!BinOps.empty()) {
13502 SDValue BinOp = BinOps.pop_back_val();
13503
13504 if (!Visited.insert(BinOp.getNode()).second)
13505 continue;
13506
13507 PromOps.push_back(BinOp);
13508
13509 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13510 // The condition of the select is not promoted.
13511 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13512 continue;
13513 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13514 continue;
13515
13516 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13517 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13518 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13519 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13520 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13521 Inputs.push_back(BinOp.getOperand(i));
13522 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13523 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13524 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13525 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13526 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13527 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13528 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13529 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13530 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13531 BinOps.push_back(BinOp.getOperand(i));
13532 } else {
13533 // We have an input that is not an extension or another binary
13534 // operation; we'll abort this transformation.
13535 return SDValue();
13536 }
13537 }
13538 }
13539
13540 // Make sure that this is a self-contained cluster of operations (which
13541 // is not quite the same thing as saying that everything has only one
13542 // use).
13543 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13544 if (isa<ConstantSDNode>(Inputs[i]))
13545 continue;
13546
13547 for (const SDNode *User : Inputs[i].getNode()->uses()) {
13548 if (User != N && !Visited.count(User))
13549 return SDValue();
13550
13551 // Make sure that we're not going to promote the non-output-value
13552 // operand(s) or SELECT or SELECT_CC.
13553 // FIXME: Although we could sometimes handle this, and it does occur in
13554 // practice that one of the condition inputs to the select is also one of
13555 // the outputs, we currently can't deal with this.
13556 if (User->getOpcode() == ISD::SELECT) {
13557 if (User->getOperand(0) == Inputs[i])
13558 return SDValue();
13559 } else if (User->getOpcode() == ISD::SELECT_CC) {
13560 if (User->getOperand(0) == Inputs[i] ||
13561 User->getOperand(1) == Inputs[i])
13562 return SDValue();
13563 }
13564 }
13565 }
13566
13567 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13568 for (const SDNode *User : PromOps[i].getNode()->uses()) {
13569 if (User != N && !Visited.count(User))
13570 return SDValue();
13571
13572 // Make sure that we're not going to promote the non-output-value
13573 // operand(s) or SELECT or SELECT_CC.
13574 // FIXME: Although we could sometimes handle this, and it does occur in
13575 // practice that one of the condition inputs to the select is also one of
13576 // the outputs, we currently can't deal with this.
13577 if (User->getOpcode() == ISD::SELECT) {
13578 if (User->getOperand(0) == PromOps[i])
13579 return SDValue();
13580 } else if (User->getOpcode() == ISD::SELECT_CC) {
13581 if (User->getOperand(0) == PromOps[i] ||
13582 User->getOperand(1) == PromOps[i])
13583 return SDValue();
13584 }
13585 }
13586 }
13587
13588 // Replace all inputs with the extension operand.
13589 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13590 // Constants may have users outside the cluster of to-be-promoted nodes,
13591 // and so we need to replace those as we do the promotions.
13592 if (isa<ConstantSDNode>(Inputs[i]))
13593 continue;
13594 else
13595 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13596 }
13597
13598 std::list<HandleSDNode> PromOpHandles;
13599 for (auto &PromOp : PromOps)
13600 PromOpHandles.emplace_back(PromOp);
13601
13602 // Replace all operations (these are all the same, but have a different
13603 // (i1) return type). DAG.getNode will validate that the types of
13604 // a binary operator match, so go through the list in reverse so that
13605 // we've likely promoted both operands first. Any intermediate truncations or
13606 // extensions disappear.
13607 while (!PromOpHandles.empty()) {
13608 SDValue PromOp = PromOpHandles.back().getValue();
13609 PromOpHandles.pop_back();
13610
13611 if (PromOp.getOpcode() == ISD::TRUNCATE ||
13612 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13613 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13614 PromOp.getOpcode() == ISD::ANY_EXTEND) {
13615 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13616 PromOp.getOperand(0).getValueType() != MVT::i1) {
13617 // The operand is not yet ready (see comment below).
13618 PromOpHandles.emplace_front(PromOp);
13619 continue;
13620 }
13621
13622 SDValue RepValue = PromOp.getOperand(0);
13623 if (isa<ConstantSDNode>(RepValue))
13624 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13625
13626 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13627 continue;
13628 }
13629
13630 unsigned C;
13631 switch (PromOp.getOpcode()) {
13632 default: C = 0; break;
13633 case ISD::SELECT: C = 1; break;
13634 case ISD::SELECT_CC: C = 2; break;
13635 }
13636
13637 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13638 PromOp.getOperand(C).getValueType() != MVT::i1) ||
13639 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13640 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13641 // The to-be-promoted operands of this node have not yet been
13642 // promoted (this should be rare because we're going through the
13643 // list backward, but if one of the operands has several users in
13644 // this cluster of to-be-promoted nodes, it is possible).
13645 PromOpHandles.emplace_front(PromOp);
13646 continue;
13647 }
13648
13649 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13650 PromOp.getNode()->op_end());
13651
13652 // If there are any constant inputs, make sure they're replaced now.
13653 for (unsigned i = 0; i < 2; ++i)
13654 if (isa<ConstantSDNode>(Ops[C+i]))
13655 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13656
13657 DAG.ReplaceAllUsesOfValueWith(PromOp,
13658 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13659 }
13660
13661 // Now we're left with the initial truncation itself.
13662 if (N->getOpcode() == ISD::TRUNCATE)
13663 return N->getOperand(0);
13664
13665 // Otherwise, this is a comparison. The operands to be compared have just
13666 // changed type (to i1), but everything else is the same.
13667 return SDValue(N, 0);
13668}
13669
13670SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13671 DAGCombinerInfo &DCI) const {
13672 SelectionDAG &DAG = DCI.DAG;
13673 SDLoc dl(N);
13674
13675 // If we're tracking CR bits, we need to be careful that we don't have:
13676 // zext(binary-ops(trunc(x), trunc(y)))
13677 // or
13678 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13679 // such that we're unnecessarily moving things into CR bits that can more
13680 // efficiently stay in GPRs. Note that if we're not certain that the high
13681 // bits are set as required by the final extension, we still may need to do
13682 // some masking to get the proper behavior.
13683
13684 // This same functionality is important on PPC64 when dealing with
13685 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13686 // the return values of functions. Because it is so similar, it is handled
13687 // here as well.
13688
13689 if (N->getValueType(0) != MVT::i32 &&
13690 N->getValueType(0) != MVT::i64)
13691 return SDValue();
13692
13693 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13694 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13695 return SDValue();
13696
13697 if (N->getOperand(0).getOpcode() != ISD::AND &&
13698 N->getOperand(0).getOpcode() != ISD::OR &&
13699 N->getOperand(0).getOpcode() != ISD::XOR &&
13700 N->getOperand(0).getOpcode() != ISD::SELECT &&
13701 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13702 return SDValue();
13703
13704 SmallVector<SDValue, 4> Inputs;
13705 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13706 SmallPtrSet<SDNode *, 16> Visited;
13707
13708 // Visit all inputs, collect all binary operations (and, or, xor and
13709 // select) that are all fed by truncations.
13710 while (!BinOps.empty()) {
13711 SDValue BinOp = BinOps.pop_back_val();
13712
13713 if (!Visited.insert(BinOp.getNode()).second)
13714 continue;
13715
13716 PromOps.push_back(BinOp);
13717
13718 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13719 // The condition of the select is not promoted.
13720 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13721 continue;
13722 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13723 continue;
13724
13725 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13726 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13727 Inputs.push_back(BinOp.getOperand(i));
13728 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13729 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13730 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13731 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13732 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13733 BinOps.push_back(BinOp.getOperand(i));
13734 } else {
13735 // We have an input that is not a truncation or another binary
13736 // operation; we'll abort this transformation.
13737 return SDValue();
13738 }
13739 }
13740 }
13741
13742 // The operands of a select that must be truncated when the select is
13743 // promoted because the operand is actually part of the to-be-promoted set.
13744 DenseMap<SDNode *, EVT> SelectTruncOp[2];
13745
13746 // Make sure that this is a self-contained cluster of operations (which
13747 // is not quite the same thing as saying that everything has only one
13748 // use).
13749 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13750 if (isa<ConstantSDNode>(Inputs[i]))
13751 continue;
13752
13753 for (SDNode *User : Inputs[i].getNode()->uses()) {
13754 if (User != N && !Visited.count(User))
13755 return SDValue();
13756
13757 // If we're going to promote the non-output-value operand(s) or SELECT or
13758 // SELECT_CC, record them for truncation.
13759 if (User->getOpcode() == ISD::SELECT) {
13760 if (User->getOperand(0) == Inputs[i])
13761 SelectTruncOp[0].insert(std::make_pair(User,
13762 User->getOperand(0).getValueType()));
13763 } else if (User->getOpcode() == ISD::SELECT_CC) {
13764 if (User->getOperand(0) == Inputs[i])
13765 SelectTruncOp[0].insert(std::make_pair(User,
13766 User->getOperand(0).getValueType()));
13767 if (User->getOperand(1) == Inputs[i])
13768 SelectTruncOp[1].insert(std::make_pair(User,
13769 User->getOperand(1).getValueType()));
13770 }
13771 }
13772 }
13773
13774 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13775 for (SDNode *User : PromOps[i].getNode()->uses()) {
13776 if (User != N && !Visited.count(User))
13777 return SDValue();
13778
13779 // If we're going to promote the non-output-value operand(s) or SELECT or
13780 // SELECT_CC, record them for truncation.
13781 if (User->getOpcode() == ISD::SELECT) {
13782 if (User->getOperand(0) == PromOps[i])
13783 SelectTruncOp[0].insert(std::make_pair(User,
13784 User->getOperand(0).getValueType()));
13785 } else if (User->getOpcode() == ISD::SELECT_CC) {
13786 if (User->getOperand(0) == PromOps[i])
13787 SelectTruncOp[0].insert(std::make_pair(User,
13788 User->getOperand(0).getValueType()));
13789 if (User->getOperand(1) == PromOps[i])
13790 SelectTruncOp[1].insert(std::make_pair(User,
13791 User->getOperand(1).getValueType()));
13792 }
13793 }
13794 }
13795
13796 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13797 bool ReallyNeedsExt = false;
13798 if (N->getOpcode() != ISD::ANY_EXTEND) {
13799 // If all of the inputs are not already sign/zero extended, then
13800 // we'll still need to do that at the end.
13801 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13802 if (isa<ConstantSDNode>(Inputs[i]))
13803 continue;
13804
13805 unsigned OpBits =
13806 Inputs[i].getOperand(0).getValueSizeInBits();
13807 assert(PromBits < OpBits && "Truncation not to a smaller bit count?")(static_cast <bool> (PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? void (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13807, __extension__
__PRETTY_FUNCTION__))
;
13808
13809 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13810 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13811 APInt::getHighBitsSet(OpBits,
13812 OpBits-PromBits))) ||
13813 (N->getOpcode() == ISD::SIGN_EXTEND &&
13814 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13815 (OpBits-(PromBits-1)))) {
13816 ReallyNeedsExt = true;
13817 break;
13818 }
13819 }
13820 }
13821
13822 // Replace all inputs, either with the truncation operand, or a
13823 // truncation or extension to the final output type.
13824 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13825 // Constant inputs need to be replaced with the to-be-promoted nodes that
13826 // use them because they might have users outside of the cluster of
13827 // promoted nodes.
13828 if (isa<ConstantSDNode>(Inputs[i]))
13829 continue;
13830
13831 SDValue InSrc = Inputs[i].getOperand(0);
13832 if (Inputs[i].getValueType() == N->getValueType(0))
13833 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13834 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13835 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13836 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13837 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13838 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13839 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13840 else
13841 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13842 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13843 }
13844
13845 std::list<HandleSDNode> PromOpHandles;
13846 for (auto &PromOp : PromOps)
13847 PromOpHandles.emplace_back(PromOp);
13848
13849 // Replace all operations (these are all the same, but have a different
13850 // (promoted) return type). DAG.getNode will validate that the types of
13851 // a binary operator match, so go through the list in reverse so that
13852 // we've likely promoted both operands first.
13853 while (!PromOpHandles.empty()) {
13854 SDValue PromOp = PromOpHandles.back().getValue();
13855 PromOpHandles.pop_back();
13856
13857 unsigned C;
13858 switch (PromOp.getOpcode()) {
13859 default: C = 0; break;
13860 case ISD::SELECT: C = 1; break;
13861 case ISD::SELECT_CC: C = 2; break;
13862 }
13863
13864 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13865 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13866 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13867 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13868 // The to-be-promoted operands of this node have not yet been
13869 // promoted (this should be rare because we're going through the
13870 // list backward, but if one of the operands has several users in
13871 // this cluster of to-be-promoted nodes, it is possible).
13872 PromOpHandles.emplace_front(PromOp);
13873 continue;
13874 }
13875
13876 // For SELECT and SELECT_CC nodes, we do a similar check for any
13877 // to-be-promoted comparison inputs.
13878 if (PromOp.getOpcode() == ISD::SELECT ||
13879 PromOp.getOpcode() == ISD::SELECT_CC) {
13880 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13881 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13882 (SelectTruncOp[1].count(PromOp.getNode()) &&
13883 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13884 PromOpHandles.emplace_front(PromOp);
13885 continue;
13886 }
13887 }
13888
13889 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13890 PromOp.getNode()->op_end());
13891
13892 // If this node has constant inputs, then they'll need to be promoted here.
13893 for (unsigned i = 0; i < 2; ++i) {
13894 if (!isa<ConstantSDNode>(Ops[C+i]))
13895 continue;
13896 if (Ops[C+i].getValueType() == N->getValueType(0))
13897 continue;
13898
13899 if (N->getOpcode() == ISD::SIGN_EXTEND)
13900 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13901 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13902 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13903 else
13904 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13905 }
13906
13907 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13908 // truncate them again to the original value type.
13909 if (PromOp.getOpcode() == ISD::SELECT ||
13910 PromOp.getOpcode() == ISD::SELECT_CC) {
13911 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13912 if (SI0 != SelectTruncOp[0].end())
13913 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13914 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13915 if (SI1 != SelectTruncOp[1].end())
13916 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13917 }
13918
13919 DAG.ReplaceAllUsesOfValueWith(PromOp,
13920 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13921 }
13922
13923 // Now we're left with the initial extension itself.
13924 if (!ReallyNeedsExt)
13925 return N->getOperand(0);
13926
13927 // To zero extend, just mask off everything except for the first bit (in the
13928 // i1 case).
13929 if (N->getOpcode() == ISD::ZERO_EXTEND)
13930 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13931 DAG.getConstant(APInt::getLowBitsSet(
13932 N->getValueSizeInBits(0), PromBits),
13933 dl, N->getValueType(0)));
13934
13935 assert(N->getOpcode() == ISD::SIGN_EXTEND &&(static_cast <bool> (N->getOpcode() == ISD::SIGN_EXTEND
&& "Invalid extension type") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13936, __extension__
__PRETTY_FUNCTION__))
13936 "Invalid extension type")(static_cast <bool> (N->getOpcode() == ISD::SIGN_EXTEND
&& "Invalid extension type") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13936, __extension__
__PRETTY_FUNCTION__))
;
13937 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13938 SDValue ShiftCst =
13939 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13940 return DAG.getNode(
13941 ISD::SRA, dl, N->getValueType(0),
13942 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13943 ShiftCst);
13944}
13945
13946SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13947 DAGCombinerInfo &DCI) const {
13948 assert(N->getOpcode() == ISD::SETCC &&(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"Should be called with a SETCC node") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13949, __extension__
__PRETTY_FUNCTION__))
13949 "Should be called with a SETCC node")(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"Should be called with a SETCC node") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13949, __extension__
__PRETTY_FUNCTION__))
;
13950
13951 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13952 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13953 SDValue LHS = N->getOperand(0);
13954 SDValue RHS = N->getOperand(1);
13955
13956 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13957 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13958 LHS.hasOneUse())
13959 std::swap(LHS, RHS);
13960
13961 // x == 0-y --> x+y == 0
13962 // x != 0-y --> x+y != 0
13963 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13964 RHS.hasOneUse()) {
13965 SDLoc DL(N);
13966 SelectionDAG &DAG = DCI.DAG;
13967 EVT VT = N->getValueType(0);
13968 EVT OpVT = LHS.getValueType();
13969 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13970 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13971 }
13972 }
13973
13974 return DAGCombineTruncBoolExt(N, DCI);
13975}
13976
13977// Is this an extending load from an f32 to an f64?
13978static bool isFPExtLoad(SDValue Op) {
13979 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13980 return LD->getExtensionType() == ISD::EXTLOAD &&
13981 Op.getValueType() == MVT::f64;
13982 return false;
13983}
13984
13985/// Reduces the number of fp-to-int conversion when building a vector.
13986///
13987/// If this vector is built out of floating to integer conversions,
13988/// transform it to a vector built out of floating point values followed by a
13989/// single floating to integer conversion of the vector.
13990/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13991/// becomes (fptosi (build_vector ($A, $B, ...)))
13992SDValue PPCTargetLowering::
13993combineElementTruncationToVectorTruncation(SDNode *N,
13994 DAGCombinerInfo &DCI) const {
13995 assert(N->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Should be called with a BUILD_VECTOR node") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13996, __extension__
__PRETTY_FUNCTION__))
13996 "Should be called with a BUILD_VECTOR node")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Should be called with a BUILD_VECTOR node") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 13996, __extension__
__PRETTY_FUNCTION__))
;
13997
13998 SelectionDAG &DAG = DCI.DAG;
13999 SDLoc dl(N);
14000
14001 SDValue FirstInput = N->getOperand(0);
14002 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&(static_cast <bool> (FirstInput.getOpcode() == PPCISD::
MFVSR && "The input operand must be an fp-to-int conversion."
) ? void (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14003, __extension__
__PRETTY_FUNCTION__))
14003 "The input operand must be an fp-to-int conversion.")(static_cast <bool> (FirstInput.getOpcode() == PPCISD::
MFVSR && "The input operand must be an fp-to-int conversion."
) ? void (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14003, __extension__
__PRETTY_FUNCTION__))
;
14004
14005 // This combine happens after legalization so the fp_to_[su]i nodes are
14006 // already converted to PPCSISD nodes.
14007 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14008 if (FirstConversion == PPCISD::FCTIDZ ||
14009 FirstConversion == PPCISD::FCTIDUZ ||
14010 FirstConversion == PPCISD::FCTIWZ ||
14011 FirstConversion == PPCISD::FCTIWUZ) {
14012 bool IsSplat = true;
14013 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14014 FirstConversion == PPCISD::FCTIWUZ;
14015 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14016 SmallVector<SDValue, 4> Ops;
14017 EVT TargetVT = N->getValueType(0);
14018 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14019 SDValue NextOp = N->getOperand(i);
14020 if (NextOp.getOpcode() != PPCISD::MFVSR)
14021 return SDValue();
14022 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14023 if (NextConversion != FirstConversion)
14024 return SDValue();
14025 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14026 // This is not valid if the input was originally double precision. It is
14027 // also not profitable to do unless this is an extending load in which
14028 // case doing this combine will allow us to combine consecutive loads.
14029 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14030 return SDValue();
14031 if (N->getOperand(i) != FirstInput)
14032 IsSplat = false;
14033 }
14034
14035 // If this is a splat, we leave it as-is since there will be only a single
14036 // fp-to-int conversion followed by a splat of the integer. This is better
14037 // for 32-bit and smaller ints and neutral for 64-bit ints.
14038 if (IsSplat)
14039 return SDValue();
14040
14041 // Now that we know we have the right type of node, get its operands
14042 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14043 SDValue In = N->getOperand(i).getOperand(0);
14044 if (Is32Bit) {
14045 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14046 // here, we know that all inputs are extending loads so this is safe).
14047 if (In.isUndef())
14048 Ops.push_back(DAG.getUNDEF(SrcVT));
14049 else {
14050 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
14051 MVT::f32, In.getOperand(0),
14052 DAG.getIntPtrConstant(1, dl));
14053 Ops.push_back(Trunc);
14054 }
14055 } else
14056 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14057 }
14058
14059 unsigned Opcode;
14060 if (FirstConversion == PPCISD::FCTIDZ ||
14061 FirstConversion == PPCISD::FCTIWZ)
14062 Opcode = ISD::FP_TO_SINT;
14063 else
14064 Opcode = ISD::FP_TO_UINT;
14065
14066 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14067 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14068 return DAG.getNode(Opcode, dl, TargetVT, BV);
14069 }
14070 return SDValue();
14071}
14072
14073/// Reduce the number of loads when building a vector.
14074///
14075/// Building a vector out of multiple loads can be converted to a load
14076/// of the vector type if the loads are consecutive. If the loads are
14077/// consecutive but in descending order, a shuffle is added at the end
14078/// to reorder the vector.
14079static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
14080 assert(N->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Should be called with a BUILD_VECTOR node") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14081, __extension__
__PRETTY_FUNCTION__))
14081 "Should be called with a BUILD_VECTOR node")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Should be called with a BUILD_VECTOR node") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14081, __extension__
__PRETTY_FUNCTION__))
;
14082
14083 SDLoc dl(N);
14084
14085 // Return early for non byte-sized type, as they can't be consecutive.
14086 if (!N->getValueType(0).getVectorElementType().isByteSized())
14087 return SDValue();
14088
14089 bool InputsAreConsecutiveLoads = true;
14090 bool InputsAreReverseConsecutive = true;
14091 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14092 SDValue FirstInput = N->getOperand(0);
14093 bool IsRoundOfExtLoad = false;
14094
14095 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14096 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14097 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
14098 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
14099 }
14100 // Not a build vector of (possibly fp_rounded) loads.
14101 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14102 N->getNumOperands() == 1)
14103 return SDValue();
14104
14105 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14106 // If any inputs are fp_round(extload), they all must be.
14107 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14108 return SDValue();
14109
14110 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14111 N->getOperand(i);
14112 if (NextInput.getOpcode() != ISD::LOAD)
14113 return SDValue();
14114
14115 SDValue PreviousInput =
14116 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14117 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
14118 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
14119
14120 // If any inputs are fp_round(extload), they all must be.
14121 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14122 return SDValue();
14123
14124 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
14125 InputsAreConsecutiveLoads = false;
14126 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
14127 InputsAreReverseConsecutive = false;
14128
14129 // Exit early if the loads are neither consecutive nor reverse consecutive.
14130 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14131 return SDValue();
14132 }
14133
14134 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&(static_cast <bool> (!(InputsAreConsecutiveLoads &&
InputsAreReverseConsecutive) && "The loads cannot be both consecutive and reverse consecutive."
) ? void (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14135, __extension__
__PRETTY_FUNCTION__))
14135 "The loads cannot be both consecutive and reverse consecutive.")(static_cast <bool> (!(InputsAreConsecutiveLoads &&
InputsAreReverseConsecutive) && "The loads cannot be both consecutive and reverse consecutive."
) ? void (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14135, __extension__
__PRETTY_FUNCTION__))
;
14136
14137 SDValue FirstLoadOp =
14138 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
14139 SDValue LastLoadOp =
14140 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
14141 N->getOperand(N->getNumOperands()-1);
14142
14143 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
14144 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
14145 if (InputsAreConsecutiveLoads) {
14146 assert(LD1 && "Input needs to be a LoadSDNode.")(static_cast <bool> (LD1 && "Input needs to be a LoadSDNode."
) ? void (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14146, __extension__
__PRETTY_FUNCTION__))
;
14147 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
14148 LD1->getBasePtr(), LD1->getPointerInfo(),
14149 LD1->getAlignment());
14150 }
14151 if (InputsAreReverseConsecutive) {
14152 assert(LDL && "Input needs to be a LoadSDNode.")(static_cast <bool> (LDL && "Input needs to be a LoadSDNode."
) ? void (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14152, __extension__
__PRETTY_FUNCTION__))
;
14153 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
14154 LDL->getBasePtr(), LDL->getPointerInfo(),
14155 LDL->getAlignment());
14156 SmallVector<int, 16> Ops;
14157 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14158 Ops.push_back(i);
14159
14160 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
14161 DAG.getUNDEF(N->getValueType(0)), Ops);
14162 }
14163 return SDValue();
14164}
14165
14166// This function adds the required vector_shuffle needed to get
14167// the elements of the vector extract in the correct position
14168// as specified by the CorrectElems encoding.
14169static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
14170 SDValue Input, uint64_t Elems,
14171 uint64_t CorrectElems) {
14172 SDLoc dl(N);
14173
14174 unsigned NumElems = Input.getValueType().getVectorNumElements();
14175 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14176
14177 // Knowing the element indices being extracted from the original
14178 // vector and the order in which they're being inserted, just put
14179 // them at element indices required for the instruction.
14180 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14181 if (DAG.getDataLayout().isLittleEndian())
14182 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14183 else
14184 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14185 CorrectElems = CorrectElems >> 8;
14186 Elems = Elems >> 8;
14187 }
14188
14189 SDValue Shuffle =
14190 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14191 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14192
14193 EVT VT = N->getValueType(0);
14194 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14195
14196 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14197 Input.getValueType().getVectorElementType(),
14198 VT.getVectorNumElements());
14199 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14200 DAG.getValueType(ExtVT));
14201}
14202
14203// Look for build vector patterns where input operands come from sign
14204// extended vector_extract elements of specific indices. If the correct indices
14205// aren't used, add a vector shuffle to fix up the indices and create
14206// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14207// during instruction selection.
14208static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
14209 // This array encodes the indices that the vector sign extend instructions
14210 // extract from when extending from one type to another for both BE and LE.
14211 // The right nibble of each byte corresponds to the LE incides.
14212 // and the left nibble of each byte corresponds to the BE incides.
14213 // For example: 0x3074B8FC byte->word
14214 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14215 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14216 // For example: 0x000070F8 byte->double word
14217 // For LE: the allowed indices are: 0x0,0x8
14218 // For BE: the allowed indices are: 0x7,0xF
14219 uint64_t TargetElems[] = {
14220 0x3074B8FC, // b->w
14221 0x000070F8, // b->d
14222 0x10325476, // h->w
14223 0x00003074, // h->d
14224 0x00001032, // w->d
14225 };
14226
14227 uint64_t Elems = 0;
14228 int Index;
14229 SDValue Input;
14230
14231 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14232 if (!Op)
14233 return false;
14234 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14235 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14236 return false;
14237
14238 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14239 // of the right width.
14240 SDValue Extract = Op.getOperand(0);
14241 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14242 Extract = Extract.getOperand(0);
14243 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14244 return false;
14245
14246 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14247 if (!ExtOp)
14248 return false;
14249
14250 Index = ExtOp->getZExtValue();
14251 if (Input && Input != Extract.getOperand(0))
14252 return false;
14253
14254 if (!Input)
14255 Input = Extract.getOperand(0);
14256
14257 Elems = Elems << 8;
14258 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14259 Elems |= Index;
14260
14261 return true;
14262 };
14263
14264 // If the build vector operands aren't sign extended vector extracts,
14265 // of the same input vector, then return.
14266 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14267 if (!isSExtOfVecExtract(N->getOperand(i))) {
14268 return SDValue();
14269 }
14270 }
14271
14272 // If the vector extract indicies are not correct, add the appropriate
14273 // vector_shuffle.
14274 int TgtElemArrayIdx;
14275 int InputSize = Input.getValueType().getScalarSizeInBits();
14276 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14277 if (InputSize + OutputSize == 40)
14278 TgtElemArrayIdx = 0;
14279 else if (InputSize + OutputSize == 72)
14280 TgtElemArrayIdx = 1;
14281 else if (InputSize + OutputSize == 48)
14282 TgtElemArrayIdx = 2;
14283 else if (InputSize + OutputSize == 80)
14284 TgtElemArrayIdx = 3;
14285 else if (InputSize + OutputSize == 96)
14286 TgtElemArrayIdx = 4;
14287 else
14288 return SDValue();
14289
14290 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14291 CorrectElems = DAG.getDataLayout().isLittleEndian()
14292 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14293 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14294 if (Elems != CorrectElems) {
14295 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14296 }
14297
14298 // Regular lowering will catch cases where a shuffle is not needed.
14299 return SDValue();
14300}
14301
14302// Look for the pattern of a load from a narrow width to i128, feeding
14303// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14304// (LXVRZX). This node represents a zero extending load that will be matched
14305// to the Load VSX Vector Rightmost instructions.
14306static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14307 SDLoc DL(N);
14308
14309 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14310 if (N->getValueType(0) != MVT::v1i128)
14311 return SDValue();
14312
14313 SDValue Operand = N->getOperand(0);
14314 // Proceed with the transformation if the operand to the BUILD_VECTOR
14315 // is a load instruction.
14316 if (Operand.getOpcode() != ISD::LOAD)
14317 return SDValue();
14318
14319 auto *LD = cast<LoadSDNode>(Operand);
14320 EVT MemoryType = LD->getMemoryVT();
14321
14322 // This transformation is only valid if the we are loading either a byte,
14323 // halfword, word, or doubleword.
14324 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14325 MemoryType == MVT::i32 || MemoryType == MVT::i64;
14326
14327 // Ensure that the load from the narrow width is being zero extended to i128.
14328 if (!ValidLDType ||
14329 (LD->getExtensionType() != ISD::ZEXTLOAD &&
14330 LD->getExtensionType() != ISD::EXTLOAD))
14331 return SDValue();
14332
14333 SDValue LoadOps[] = {
14334 LD->getChain(), LD->getBasePtr(),
14335 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14336
14337 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
14338 DAG.getVTList(MVT::v1i128, MVT::Other),
14339 LoadOps, MemoryType, LD->getMemOperand());
14340}
14341
14342SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14343 DAGCombinerInfo &DCI) const {
14344 assert(N->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Should be called with a BUILD_VECTOR node") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14345, __extension__
__PRETTY_FUNCTION__))
14345 "Should be called with a BUILD_VECTOR node")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Should be called with a BUILD_VECTOR node") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14345, __extension__
__PRETTY_FUNCTION__))
;
14346
14347 SelectionDAG &DAG = DCI.DAG;
14348 SDLoc dl(N);
14349
14350 if (!Subtarget.hasVSX())
14351 return SDValue();
14352
14353 // The target independent DAG combiner will leave a build_vector of
14354 // float-to-int conversions intact. We can generate MUCH better code for
14355 // a float-to-int conversion of a vector of floats.
14356 SDValue FirstInput = N->getOperand(0);
14357 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14358 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14359 if (Reduced)
14360 return Reduced;
14361 }
14362
14363 // If we're building a vector out of consecutive loads, just load that
14364 // vector type.
14365 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14366 if (Reduced)
14367 return Reduced;
14368
14369 // If we're building a vector out of extended elements from another vector
14370 // we have P9 vector integer extend instructions. The code assumes legal
14371 // input types (i.e. it can't handle things like v4i16) so do not run before
14372 // legalization.
14373 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14374 Reduced = combineBVOfVecSExt(N, DAG);
14375 if (Reduced)
14376 return Reduced;
14377 }
14378
14379 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14380 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14381 // is a load from <valid narrow width> to i128.
14382 if (Subtarget.isISA3_1()) {
14383 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14384 if (BVOfZLoad)
14385 return BVOfZLoad;
14386 }
14387
14388 if (N->getValueType(0) != MVT::v2f64)
14389 return SDValue();
14390
14391 // Looking for:
14392 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14393 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14394 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14395 return SDValue();
14396 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14397 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14398 return SDValue();
14399 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14400 return SDValue();
14401
14402 SDValue Ext1 = FirstInput.getOperand(0);
14403 SDValue Ext2 = N->getOperand(1).getOperand(0);
14404 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14405 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14406 return SDValue();
14407
14408 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14409 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14410 if (!Ext1Op || !Ext2Op)
14411 return SDValue();
14412 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14413 Ext1.getOperand(0) != Ext2.getOperand(0))
14414 return SDValue();
14415
14416 int FirstElem = Ext1Op->getZExtValue();
14417 int SecondElem = Ext2Op->getZExtValue();
14418 int SubvecIdx;
14419 if (FirstElem == 0 && SecondElem == 1)
14420 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14421 else if (FirstElem == 2 && SecondElem == 3)
14422 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14423 else
14424 return SDValue();
14425
14426 SDValue SrcVec = Ext1.getOperand(0);
14427 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14428 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14429 return DAG.getNode(NodeType, dl, MVT::v2f64,
14430 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14431}
14432
14433SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14434 DAGCombinerInfo &DCI) const {
14435 assert((N->getOpcode() == ISD::SINT_TO_FP ||(static_cast <bool> ((N->getOpcode() == ISD::SINT_TO_FP
|| N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14437, __extension__
__PRETTY_FUNCTION__))
14436 N->getOpcode() == ISD::UINT_TO_FP) &&(static_cast <bool> ((N->getOpcode() == ISD::SINT_TO_FP
|| N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14437, __extension__
__PRETTY_FUNCTION__))
14437 "Need an int -> FP conversion node here")(static_cast <bool> ((N->getOpcode() == ISD::SINT_TO_FP
|| N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14437, __extension__
__PRETTY_FUNCTION__))
;
14438
14439 if (useSoftFloat() || !Subtarget.has64BitSupport())
14440 return SDValue();
14441
14442 SelectionDAG &DAG = DCI.DAG;
14443 SDLoc dl(N);
14444 SDValue Op(N, 0);
14445
14446 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14447 // from the hardware.
14448 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14449 return SDValue();
14450 if (!Op.getOperand(0).getValueType().isSimple())
14451 return SDValue();
14452 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14453 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14454 return SDValue();
14455
14456 SDValue FirstOperand(Op.getOperand(0));
14457 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14458 (FirstOperand.getValueType() == MVT::i8 ||
14459 FirstOperand.getValueType() == MVT::i16);
14460 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14461 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14462 bool DstDouble = Op.getValueType() == MVT::f64;
14463 unsigned ConvOp = Signed ?
14464 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14465 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14466 SDValue WidthConst =
14467 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14468 dl, false);
14469 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14470 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14471 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14472 DAG.getVTList(MVT::f64, MVT::Other),
14473 Ops, MVT::i8, LDN->getMemOperand());
14474
14475 // For signed conversion, we need to sign-extend the value in the VSR
14476 if (Signed) {
14477 SDValue ExtOps[] = { Ld, WidthConst };
14478 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14479 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14480 } else
14481 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14482 }
14483
14484
14485 // For i32 intermediate values, unfortunately, the conversion functions
14486 // leave the upper 32 bits of the value are undefined. Within the set of
14487 // scalar instructions, we have no method for zero- or sign-extending the
14488 // value. Thus, we cannot handle i32 intermediate values here.
14489 if (Op.getOperand(0).getValueType() == MVT::i32)
14490 return SDValue();
14491
14492 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(static_cast <bool> ((Op.getOpcode() == ISD::SINT_TO_FP
|| Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? void (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14493, __extension__
__PRETTY_FUNCTION__))
14493 "UINT_TO_FP is supported only with FPCVT")(static_cast <bool> ((Op.getOpcode() == ISD::SINT_TO_FP
|| Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? void (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14493, __extension__
__PRETTY_FUNCTION__))
;
14494
14495 // If we have FCFIDS, then use it when converting to single-precision.
14496 // Otherwise, convert to double-precision and then round.
14497 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14498 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14499 : PPCISD::FCFIDS)
14500 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14501 : PPCISD::FCFID);
14502 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14503 ? MVT::f32
14504 : MVT::f64;
14505
14506 // If we're converting from a float, to an int, and back to a float again,
14507 // then we don't need the store/load pair at all.
14508 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14509 Subtarget.hasFPCVT()) ||
14510 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14511 SDValue Src = Op.getOperand(0).getOperand(0);
14512 if (Src.getValueType() == MVT::f32) {
14513 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14514 DCI.AddToWorklist(Src.getNode());
14515 } else if (Src.getValueType() != MVT::f64) {
14516 // Make sure that we don't pick up a ppc_fp128 source value.
14517 return SDValue();
14518 }
14519
14520 unsigned FCTOp =
14521 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14522 PPCISD::FCTIDUZ;
14523
14524 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14525 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14526
14527 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14528 FP = DAG.getNode(ISD::FP_ROUND, dl,
14529 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14530 DCI.AddToWorklist(FP.getNode());
14531 }
14532
14533 return FP;
14534 }
14535
14536 return SDValue();
14537}
14538
14539// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14540// builtins) into loads with swaps.
14541SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14542 DAGCombinerInfo &DCI) const {
14543 SelectionDAG &DAG = DCI.DAG;
14544 SDLoc dl(N);
14545 SDValue Chain;
14546 SDValue Base;
14547 MachineMemOperand *MMO;
14548
14549 switch (N->getOpcode()) {
14550 default:
14551 llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14551)
;
14552 case ISD::LOAD: {
14553 LoadSDNode *LD = cast<LoadSDNode>(N);
14554 Chain = LD->getChain();
14555 Base = LD->getBasePtr();
14556 MMO = LD->getMemOperand();
14557 // If the MMO suggests this isn't a load of a full vector, leave
14558 // things alone. For a built-in, we have to make the change for
14559 // correctness, so if there is a size problem that will be a bug.
14560 if (MMO->getSize() < 16)
14561 return SDValue();
14562 break;
14563 }
14564 case ISD::INTRINSIC_W_CHAIN: {
14565 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14566 Chain = Intrin->getChain();
14567 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14568 // us what we want. Get operand 2 instead.
14569 Base = Intrin->getOperand(2);
14570 MMO = Intrin->getMemOperand();
14571 break;
14572 }
14573 }
14574
14575 MVT VecTy = N->getValueType(0).getSimpleVT();
14576
14577 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14578 // aligned and the type is a vector with elements up to 4 bytes
14579 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14580 VecTy.getScalarSizeInBits() <= 32) {
14581 return SDValue();
14582 }
14583
14584 SDValue LoadOps[] = { Chain, Base };
14585 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14586 DAG.getVTList(MVT::v2f64, MVT::Other),
14587 LoadOps, MVT::v2f64, MMO);
14588
14589 DCI.AddToWorklist(Load.getNode());
14590 Chain = Load.getValue(1);
14591 SDValue Swap = DAG.getNode(
14592 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14593 DCI.AddToWorklist(Swap.getNode());
14594
14595 // Add a bitcast if the resulting load type doesn't match v2f64.
14596 if (VecTy != MVT::v2f64) {
14597 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14598 DCI.AddToWorklist(N.getNode());
14599 // Package {bitcast value, swap's chain} to match Load's shape.
14600 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14601 N, Swap.getValue(1));
14602 }
14603
14604 return Swap;
14605}
14606
14607// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14608// builtins) into stores with swaps.
14609SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14610 DAGCombinerInfo &DCI) const {
14611 SelectionDAG &DAG = DCI.DAG;
14612 SDLoc dl(N);
14613 SDValue Chain;
14614 SDValue Base;
14615 unsigned SrcOpnd;
14616 MachineMemOperand *MMO;
14617
14618 switch (N->getOpcode()) {
14619 default:
14620 llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14620)
;
14621 case ISD::STORE: {
14622 StoreSDNode *ST = cast<StoreSDNode>(N);
14623 Chain = ST->getChain();
14624 Base = ST->getBasePtr();
14625 MMO = ST->getMemOperand();
14626 SrcOpnd = 1;
14627 // If the MMO suggests this isn't a store of a full vector, leave
14628 // things alone. For a built-in, we have to make the change for
14629 // correctness, so if there is a size problem that will be a bug.
14630 if (MMO->getSize() < 16)
14631 return SDValue();
14632 break;
14633 }
14634 case ISD::INTRINSIC_VOID: {
14635 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14636 Chain = Intrin->getChain();
14637 // Intrin->getBasePtr() oddly does not get what we want.
14638 Base = Intrin->getOperand(3);
14639 MMO = Intrin->getMemOperand();
14640 SrcOpnd = 2;
14641 break;
14642 }
14643 }
14644
14645 SDValue Src = N->getOperand(SrcOpnd);
14646 MVT VecTy = Src.getValueType().getSimpleVT();
14647
14648 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14649 // aligned and the type is a vector with elements up to 4 bytes
14650 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14651 VecTy.getScalarSizeInBits() <= 32) {
14652 return SDValue();
14653 }
14654
14655 // All stores are done as v2f64 and possible bit cast.
14656 if (VecTy != MVT::v2f64) {
14657 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14658 DCI.AddToWorklist(Src.getNode());
14659 }
14660
14661 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14662 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14663 DCI.AddToWorklist(Swap.getNode());
14664 Chain = Swap.getValue(1);
14665 SDValue StoreOps[] = { Chain, Swap, Base };
14666 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14667 DAG.getVTList(MVT::Other),
14668 StoreOps, VecTy, MMO);
14669 DCI.AddToWorklist(Store.getNode());
14670 return Store;
14671}
14672
14673// Handle DAG combine for STORE (FP_TO_INT F).
14674SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14675 DAGCombinerInfo &DCI) const {
14676
14677 SelectionDAG &DAG = DCI.DAG;
14678 SDLoc dl(N);
14679 unsigned Opcode = N->getOperand(1).getOpcode();
14680
14681 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(static_cast <bool> ((Opcode == ISD::FP_TO_SINT || Opcode
== ISD::FP_TO_UINT) && "Not a FP_TO_INT Instruction!"
) ? void (0) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14682, __extension__
__PRETTY_FUNCTION__))
14682 && "Not a FP_TO_INT Instruction!")(static_cast <bool> ((Opcode == ISD::FP_TO_SINT || Opcode
== ISD::FP_TO_UINT) && "Not a FP_TO_INT Instruction!"
) ? void (0) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14682, __extension__
__PRETTY_FUNCTION__))
;
14683
14684 SDValue Val = N->getOperand(1).getOperand(0);
14685 EVT Op1VT = N->getOperand(1).getValueType();
14686 EVT ResVT = Val.getValueType();
14687
14688 if (!isTypeLegal(ResVT))
14689 return SDValue();
14690
14691 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14692 bool ValidTypeForStoreFltAsInt =
14693 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14694 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14695
14696 if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
14697 return SDValue();
14698
14699 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14700 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14701 return SDValue();
14702
14703 // Extend f32 values to f64
14704 if (ResVT.getScalarSizeInBits() == 32) {
14705 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14706 DCI.AddToWorklist(Val.getNode());
14707 }
14708
14709 // Set signed or unsigned conversion opcode.
14710 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14711 PPCISD::FP_TO_SINT_IN_VSR :
14712 PPCISD::FP_TO_UINT_IN_VSR;
14713
14714 Val = DAG.getNode(ConvOpcode,
14715 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14716 DCI.AddToWorklist(Val.getNode());
14717
14718 // Set number of bytes being converted.
14719 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14720 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14721 DAG.getIntPtrConstant(ByteSize, dl, false),
14722 DAG.getValueType(Op1VT) };
14723
14724 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14725 DAG.getVTList(MVT::Other), Ops,
14726 cast<StoreSDNode>(N)->getMemoryVT(),
14727 cast<StoreSDNode>(N)->getMemOperand());
14728
14729 DCI.AddToWorklist(Val.getNode());
14730 return Val;
14731}
14732
14733static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14734 // Check that the source of the element keeps flipping
14735 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14736 bool PrevElemFromFirstVec = Mask[0] < NumElts;
14737 for (int i = 1, e = Mask.size(); i < e; i++) {
14738 if (PrevElemFromFirstVec && Mask[i] < NumElts)
14739 return false;
14740 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14741 return false;
14742 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14743 }
14744 return true;
14745}
14746
14747static bool isSplatBV(SDValue Op) {
14748 if (Op.getOpcode() != ISD::BUILD_VECTOR)
14749 return false;
14750 SDValue FirstOp;
14751
14752 // Find first non-undef input.
14753 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14754 FirstOp = Op.getOperand(i);
14755 if (!FirstOp.isUndef())
14756 break;
14757 }
14758
14759 // All inputs are undef or the same as the first non-undef input.
14760 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14761 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14762 return false;
14763 return true;
14764}
14765
14766static SDValue isScalarToVec(SDValue Op) {
14767 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14768 return Op;
14769 if (Op.getOpcode() != ISD::BITCAST)
14770 return SDValue();
14771 Op = Op.getOperand(0);
14772 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14773 return Op;
14774 return SDValue();
14775}
14776
14777// Fix up the shuffle mask to account for the fact that the result of
14778// scalar_to_vector is not in lane zero. This just takes all values in
14779// the ranges specified by the min/max indices and adds the number of
14780// elements required to ensure each element comes from the respective
14781// position in the valid lane.
14782// On little endian, that's just the corresponding element in the other
14783// half of the vector. On big endian, it is in the same half but right
14784// justified rather than left justified in that half.
14785static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14786 int LHSMaxIdx, int RHSMinIdx,
14787 int RHSMaxIdx, int HalfVec,
14788 unsigned ValidLaneWidth,
14789 const PPCSubtarget &Subtarget) {
14790 for (int i = 0, e = ShuffV.size(); i < e; i++) {
14791 int Idx = ShuffV[i];
14792 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14793 ShuffV[i] +=
14794 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14795 }
14796}
14797
14798// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14799// the original is:
14800// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14801// In such a case, just change the shuffle mask to extract the element
14802// from the permuted index.
14803static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
14804 const PPCSubtarget &Subtarget) {
14805 SDLoc dl(OrigSToV);
14806 EVT VT = OrigSToV.getValueType();
14807 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&(static_cast <bool> (OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR
&& "Expecting a SCALAR_TO_VECTOR here") ? void (0) :
__assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14808, __extension__
__PRETTY_FUNCTION__))
14808 "Expecting a SCALAR_TO_VECTOR here")(static_cast <bool> (OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR
&& "Expecting a SCALAR_TO_VECTOR here") ? void (0) :
__assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14808, __extension__
__PRETTY_FUNCTION__))
;
14809 SDValue Input = OrigSToV.getOperand(0);
14810
14811 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14812 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14813 SDValue OrigVector = Input.getOperand(0);
14814
14815 // Can't handle non-const element indices or different vector types
14816 // for the input to the extract and the output of the scalar_to_vector.
14817 if (Idx && VT == OrigVector.getValueType()) {
14818 unsigned NumElts = VT.getVectorNumElements();
14819 assert((static_cast <bool> (NumElts > 1 && "Cannot produce a permuted scalar_to_vector for one element vector"
) ? void (0) : __assert_fail ("NumElts > 1 && \"Cannot produce a permuted scalar_to_vector for one element vector\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14821, __extension__
__PRETTY_FUNCTION__))
14820 NumElts > 1 &&(static_cast <bool> (NumElts > 1 && "Cannot produce a permuted scalar_to_vector for one element vector"
) ? void (0) : __assert_fail ("NumElts > 1 && \"Cannot produce a permuted scalar_to_vector for one element vector\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14821, __extension__
__PRETTY_FUNCTION__))
14821 "Cannot produce a permuted scalar_to_vector for one element vector")(static_cast <bool> (NumElts > 1 && "Cannot produce a permuted scalar_to_vector for one element vector"
) ? void (0) : __assert_fail ("NumElts > 1 && \"Cannot produce a permuted scalar_to_vector for one element vector\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 14821, __extension__
__PRETTY_FUNCTION__))
;
14822 SmallVector<int, 16> NewMask(NumElts, -1);
14823 unsigned ResultInElt = NumElts / 2;
14824 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
14825 NewMask[ResultInElt] = Idx->getZExtValue();
14826 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14827 }
14828 }
14829 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14830 OrigSToV.getOperand(0));
14831}
14832
14833// On little endian subtargets, combine shuffles such as:
14834// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14835// into:
14836// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14837// because the latter can be matched to a single instruction merge.
14838// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14839// to put the value into element zero. Adjust the shuffle mask so that the
14840// vector can remain in permuted form (to prevent a swap prior to a shuffle).
14841// On big endian targets, this is still useful for SCALAR_TO_VECTOR
14842// nodes with elements smaller than doubleword because all the ways
14843// of getting scalar data into a vector register put the value in the
14844// rightmost element of the left half of the vector.
14845SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14846 SelectionDAG &DAG) const {
14847 SDValue LHS = SVN->getOperand(0);
14848 SDValue RHS = SVN->getOperand(1);
14849 auto Mask = SVN->getMask();
14850 int NumElts = LHS.getValueType().getVectorNumElements();
14851 SDValue Res(SVN, 0);
14852 SDLoc dl(SVN);
14853 bool IsLittleEndian = Subtarget.isLittleEndian();
14854
14855 // On big endian targets this is only useful for subtargets with direct moves.
14856 // On little endian targets it would be useful for all subtargets with VSX.
14857 // However adding special handling for LE subtargets without direct moves
14858 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
14859 // which includes direct moves.
14860 if (!Subtarget.hasDirectMove())
14861 return Res;
14862
14863 // If this is not a shuffle of a shuffle and the first element comes from
14864 // the second vector, canonicalize to the commuted form. This will make it
14865 // more likely to match one of the single instruction patterns.
14866 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14867 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14868 std::swap(LHS, RHS);
14869 Res = DAG.getCommutedVectorShuffle(*SVN);
14870 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14871 }
14872
14873 // Adjust the shuffle mask if either input vector comes from a
14874 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14875 // form (to prevent the need for a swap).
14876 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14877 SDValue SToVLHS = isScalarToVec(LHS);
14878 SDValue SToVRHS = isScalarToVec(RHS);
14879 if (SToVLHS || SToVRHS) {
14880 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14881 : SToVRHS.getValueType().getVectorNumElements();
14882 int NumEltsOut = ShuffV.size();
14883 // The width of the "valid lane" (i.e. the lane that contains the value that
14884 // is vectorized) needs to be expressed in terms of the number of elements
14885 // of the shuffle. It is thereby the ratio of the values before and after
14886 // any bitcast.
14887 unsigned ValidLaneWidth =
14888 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
14889 LHS.getValueType().getScalarSizeInBits()
14890 : SToVRHS.getValueType().getScalarSizeInBits() /
14891 RHS.getValueType().getScalarSizeInBits();
14892
14893 // Initially assume that neither input is permuted. These will be adjusted
14894 // accordingly if either input is.
14895 int LHSMaxIdx = -1;
14896 int RHSMinIdx = -1;
14897 int RHSMaxIdx = -1;
14898 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14899
14900 // Get the permuted scalar to vector nodes for the source(s) that come from
14901 // ISD::SCALAR_TO_VECTOR.
14902 // On big endian systems, this only makes sense for element sizes smaller
14903 // than 64 bits since for 64-bit elements, all instructions already put
14904 // the value into element zero. Since scalar size of LHS and RHS may differ
14905 // after isScalarToVec, this should be checked using their own sizes.
14906 if (SToVLHS) {
14907 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
14908 return Res;
14909 // Set up the values for the shuffle vector fixup.
14910 LHSMaxIdx = NumEltsOut / NumEltsIn;
14911 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
14912 if (SToVLHS.getValueType() != LHS.getValueType())
14913 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14914 LHS = SToVLHS;
14915 }
14916 if (SToVRHS) {
14917 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
14918 return Res;
14919 RHSMinIdx = NumEltsOut;
14920 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14921 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
14922 if (SToVRHS.getValueType() != RHS.getValueType())
14923 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14924 RHS = SToVRHS;
14925 }
14926
14927 // Fix up the shuffle mask to reflect where the desired element actually is.
14928 // The minimum and maximum indices that correspond to element zero for both
14929 // the LHS and RHS are computed and will control which shuffle mask entries
14930 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14931 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
14932 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14933 HalfVec, ValidLaneWidth, Subtarget);
14934 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14935
14936 // We may have simplified away the shuffle. We won't be able to do anything
14937 // further with it here.
14938 if (!isa<ShuffleVectorSDNode>(Res))
14939 return Res;
14940 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14941 }
14942
14943 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
14944 // The common case after we commuted the shuffle is that the RHS is a splat
14945 // and we have elements coming in from the splat at indices that are not
14946 // conducive to using a merge.
14947 // Example:
14948 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14949 if (!isSplatBV(TheSplat))
14950 return Res;
14951
14952 // We are looking for a mask such that all even elements are from
14953 // one vector and all odd elements from the other.
14954 if (!isAlternatingShuffMask(Mask, NumElts))
14955 return Res;
14956
14957 // Adjust the mask so we are pulling in the same index from the splat
14958 // as the index from the interesting vector in consecutive elements.
14959 if (IsLittleEndian) {
14960 // Example (even elements from first vector):
14961 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14962 if (Mask[0] < NumElts)
14963 for (int i = 1, e = Mask.size(); i < e; i += 2)
14964 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14965 // Example (odd elements from first vector):
14966 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14967 else
14968 for (int i = 0, e = Mask.size(); i < e; i += 2)
14969 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14970 } else {
14971 // Example (even elements from first vector):
14972 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
14973 if (Mask[0] < NumElts)
14974 for (int i = 0, e = Mask.size(); i < e; i += 2)
14975 ShuffV[i] = ShuffV[i + 1] - NumElts;
14976 // Example (odd elements from first vector):
14977 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
14978 else
14979 for (int i = 1, e = Mask.size(); i < e; i += 2)
14980 ShuffV[i] = ShuffV[i - 1] - NumElts;
14981 }
14982
14983 // If the RHS has undefs, we need to remove them since we may have created
14984 // a shuffle that adds those instead of the splat value.
14985 SDValue SplatVal =
14986 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
14987 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
14988
14989 if (IsLittleEndian)
14990 RHS = TheSplat;
14991 else
14992 LHS = TheSplat;
14993 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14994}
14995
14996SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14997 LSBaseSDNode *LSBase,
14998 DAGCombinerInfo &DCI) const {
14999 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(static_cast <bool> ((ISD::isNormalLoad(LSBase) || ISD::
isNormalStore(LSBase)) && "Not a reverse memop pattern!"
) ? void (0) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15000, __extension__
__PRETTY_FUNCTION__))
15000 "Not a reverse memop pattern!")(static_cast <bool> ((ISD::isNormalLoad(LSBase) || ISD::
isNormalStore(LSBase)) && "Not a reverse memop pattern!"
) ? void (0) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15000, __extension__
__PRETTY_FUNCTION__))
;
15001
15002 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15003 auto Mask = SVN->getMask();
15004 int i = 0;
15005 auto I = Mask.rbegin();
15006 auto E = Mask.rend();
15007
15008 for (; I != E; ++I) {
15009 if (*I != i)
15010 return false;
15011 i++;
15012 }
15013 return true;
15014 };
15015
15016 SelectionDAG &DAG = DCI.DAG;
15017 EVT VT = SVN->getValueType(0);
15018
15019 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15020 return SDValue();
15021
15022 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15023 // See comment in PPCVSXSwapRemoval.cpp.
15024 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15025 if (!Subtarget.hasP9Vector())
15026 return SDValue();
15027
15028 if(!IsElementReverse(SVN))
15029 return SDValue();
15030
15031 if (LSBase->getOpcode() == ISD::LOAD) {
15032 // If the load return value 0 has more than one user except the
15033 // shufflevector instruction, it is not profitable to replace the
15034 // shufflevector with a reverse load.
15035 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15036 UI != UE; ++UI)
15037 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15038 return SDValue();
15039
15040 SDLoc dl(LSBase);
15041 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15042 return DAG.getMemIntrinsicNode(
15043 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15044 LSBase->getMemoryVT(), LSBase->getMemOperand());
15045 }
15046
15047 if (LSBase->getOpcode() == ISD::STORE) {
15048 // If there are other uses of the shuffle, the swap cannot be avoided.
15049 // Forcing the use of an X-Form (since swapped stores only have
15050 // X-Forms) without removing the swap is unprofitable.
15051 if (!SVN->hasOneUse())
15052 return SDValue();
15053
15054 SDLoc dl(LSBase);
15055 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15056 LSBase->getBasePtr()};
15057 return DAG.getMemIntrinsicNode(
15058 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15059 LSBase->getMemoryVT(), LSBase->getMemOperand());
15060 }
15061
15062 llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15062)
;
15063}
15064
15065SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
15066 DAGCombinerInfo &DCI) const {
15067 SelectionDAG &DAG = DCI.DAG;
15068 SDLoc dl(N);
15069 switch (N->getOpcode()) {
15070 default: break;
15071 case ISD::ADD:
15072 return combineADD(N, DCI);
15073 case ISD::SHL:
15074 return combineSHL(N, DCI);
15075 case ISD::SRA:
15076 return combineSRA(N, DCI);
15077 case ISD::SRL:
15078 return combineSRL(N, DCI);
15079 case ISD::MUL:
15080 return combineMUL(N, DCI);
15081 case ISD::FMA:
15082 case PPCISD::FNMSUB:
15083 return combineFMALike(N, DCI);
15084 case PPCISD::SHL:
15085 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15086 return N->getOperand(0);
15087 break;
15088 case PPCISD::SRL:
15089 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15090 return N->getOperand(0);
15091 break;
15092 case PPCISD::SRA:
15093 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15094 if (C->isZero() || // 0 >>s V -> 0.
15095 C->isAllOnes()) // -1 >>s V -> -1.
15096 return N->getOperand(0);
15097 }
15098 break;
15099 case ISD::SIGN_EXTEND:
15100 case ISD::ZERO_EXTEND:
15101 case ISD::ANY_EXTEND:
15102 return DAGCombineExtBoolTrunc(N, DCI);
15103 case ISD::TRUNCATE:
15104 return combineTRUNCATE(N, DCI);
15105 case ISD::SETCC:
15106 if (SDValue CSCC = combineSetCC(N, DCI))
15107 return CSCC;
15108 LLVM_FALLTHROUGH[[gnu::fallthrough]];
15109 case ISD::SELECT_CC:
15110 return DAGCombineTruncBoolExt(N, DCI);
15111 case ISD::SINT_TO_FP:
15112 case ISD::UINT_TO_FP:
15113 return combineFPToIntToFP(N, DCI);
15114 case ISD::VECTOR_SHUFFLE:
15115 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15116 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15117 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15118 }
15119 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15120 case ISD::STORE: {
15121
15122 EVT Op1VT = N->getOperand(1).getValueType();
15123 unsigned Opcode = N->getOperand(1).getOpcode();
15124
15125 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
15126 SDValue Val= combineStoreFPToInt(N, DCI);
15127 if (Val)
15128 return Val;
15129 }
15130
15131 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15132 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15133 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15134 if (Val)
15135 return Val;
15136 }
15137
15138 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15139 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15140 N->getOperand(1).getNode()->hasOneUse() &&
15141 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15142 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15143
15144 // STBRX can only handle simple types and it makes no sense to store less
15145 // two bytes in byte-reversed order.
15146 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15147 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15148 break;
15149
15150 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15151 // Do an any-extend to 32-bits if this is a half-word input.
15152 if (BSwapOp.getValueType() == MVT::i16)
15153 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15154
15155 // If the type of BSWAP operand is wider than stored memory width
15156 // it need to be shifted to the right side before STBRX.
15157 if (Op1VT.bitsGT(mVT)) {
15158 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15159 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15160 DAG.getConstant(Shift, dl, MVT::i32));
15161 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15162 if (Op1VT == MVT::i64)
15163 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15164 }
15165
15166 SDValue Ops[] = {
15167 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15168 };
15169 return
15170 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15171 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15172 cast<StoreSDNode>(N)->getMemOperand());
15173 }
15174
15175 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15176 // So it can increase the chance of CSE constant construction.
15177 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15178 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15179 // Need to sign-extended to 64-bits to handle negative values.
15180 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15181 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15182 MemVT.getSizeInBits());
15183 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15184
15185 // DAG.getTruncStore() can't be used here because it doesn't accept
15186 // the general (base + offset) addressing mode.
15187 // So we use UpdateNodeOperands and setTruncatingStore instead.
15188 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15189 N->getOperand(3));
15190 cast<StoreSDNode>(N)->setTruncatingStore(true);
15191 return SDValue(N, 0);
15192 }
15193
15194 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15195 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15196 if (Op1VT.isSimple()) {
15197 MVT StoreVT = Op1VT.getSimpleVT();
15198 if (Subtarget.needsSwapsForVSXMemOps() &&
15199 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15200 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15201 return expandVSXStoreForLE(N, DCI);
15202 }
15203 break;
15204 }
15205 case ISD::LOAD: {
15206 LoadSDNode *LD = cast<LoadSDNode>(N);
15207 EVT VT = LD->getValueType(0);
15208
15209 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15210 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15211 if (VT.isSimple()) {
15212 MVT LoadVT = VT.getSimpleVT();
15213 if (Subtarget.needsSwapsForVSXMemOps() &&
15214 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15215 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15216 return expandVSXLoadForLE(N, DCI);
15217 }
15218
15219 // We sometimes end up with a 64-bit integer load, from which we extract
15220 // two single-precision floating-point numbers. This happens with
15221 // std::complex<float>, and other similar structures, because of the way we
15222 // canonicalize structure copies. However, if we lack direct moves,
15223 // then the final bitcasts from the extracted integer values to the
15224 // floating-point numbers turn into store/load pairs. Even with direct moves,
15225 // just loading the two floating-point numbers is likely better.
15226 auto ReplaceTwoFloatLoad = [&]() {
15227 if (VT != MVT::i64)
15228 return false;
15229
15230 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15231 LD->isVolatile())
15232 return false;
15233
15234 // We're looking for a sequence like this:
15235 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15236 // t16: i64 = srl t13, Constant:i32<32>
15237 // t17: i32 = truncate t16
15238 // t18: f32 = bitcast t17
15239 // t19: i32 = truncate t13
15240 // t20: f32 = bitcast t19
15241
15242 if (!LD->hasNUsesOfValue(2, 0))
15243 return false;
15244
15245 auto UI = LD->use_begin();
15246 while (UI.getUse().getResNo() != 0) ++UI;
15247 SDNode *Trunc = *UI++;
15248 while (UI.getUse().getResNo() != 0) ++UI;
15249 SDNode *RightShift = *UI;
15250 if (Trunc->getOpcode() != ISD::TRUNCATE)
15251 std::swap(Trunc, RightShift);
15252
15253 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15254 Trunc->getValueType(0) != MVT::i32 ||
15255 !Trunc->hasOneUse())
15256 return false;
15257 if (RightShift->getOpcode() != ISD::SRL ||
15258 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15259 RightShift->getConstantOperandVal(1) != 32 ||
15260 !RightShift->hasOneUse())
15261 return false;
15262
15263 SDNode *Trunc2 = *RightShift->use_begin();
15264 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15265 Trunc2->getValueType(0) != MVT::i32 ||
15266 !Trunc2->hasOneUse())
15267 return false;
15268
15269 SDNode *Bitcast = *Trunc->use_begin();
15270 SDNode *Bitcast2 = *Trunc2->use_begin();
15271
15272 if (Bitcast->getOpcode() != ISD::BITCAST ||
15273 Bitcast->getValueType(0) != MVT::f32)
15274 return false;
15275 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15276 Bitcast2->getValueType(0) != MVT::f32)
15277 return false;
15278
15279 if (Subtarget.isLittleEndian())
15280 std::swap(Bitcast, Bitcast2);
15281
15282 // Bitcast has the second float (in memory-layout order) and Bitcast2
15283 // has the first one.
15284
15285 SDValue BasePtr = LD->getBasePtr();
15286 if (LD->isIndexed()) {
15287 assert(LD->getAddressingMode() == ISD::PRE_INC &&(static_cast <bool> (LD->getAddressingMode() == ISD::
PRE_INC && "Non-pre-inc AM on PPC?") ? void (0) : __assert_fail
("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15288, __extension__
__PRETTY_FUNCTION__))
15288 "Non-pre-inc AM on PPC?")(static_cast <bool> (LD->getAddressingMode() == ISD::
PRE_INC && "Non-pre-inc AM on PPC?") ? void (0) : __assert_fail
("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15288, __extension__
__PRETTY_FUNCTION__))
;
15289 BasePtr =
15290 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15291 LD->getOffset());
15292 }
15293
15294 auto MMOFlags =
15295 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15296 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15297 LD->getPointerInfo(), LD->getAlignment(),
15298 MMOFlags, LD->getAAInfo());
15299 SDValue AddPtr =
15300 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15301 BasePtr, DAG.getIntPtrConstant(4, dl));
15302 SDValue FloatLoad2 = DAG.getLoad(
15303 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15304 LD->getPointerInfo().getWithOffset(4),
15305 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
15306
15307 if (LD->isIndexed()) {
15308 // Note that DAGCombine should re-form any pre-increment load(s) from
15309 // what is produced here if that makes sense.
15310 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15311 }
15312
15313 DCI.CombineTo(Bitcast2, FloatLoad);
15314 DCI.CombineTo(Bitcast, FloatLoad2);
15315
15316 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15317 SDValue(FloatLoad2.getNode(), 1));
15318 return true;
15319 };
15320
15321 if (ReplaceTwoFloatLoad())
15322 return SDValue(N, 0);
15323
15324 EVT MemVT = LD->getMemoryVT();
15325 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15326 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15327 if (LD->isUnindexed() && VT.isVector() &&
15328 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15329 // P8 and later hardware should just use LOAD.
15330 !Subtarget.hasP8Vector() &&
15331 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15332 VT == MVT::v4f32))) &&
15333 LD->getAlign() < ABIAlignment) {
15334 // This is a type-legal unaligned Altivec load.
15335 SDValue Chain = LD->getChain();
15336 SDValue Ptr = LD->getBasePtr();
15337 bool isLittleEndian = Subtarget.isLittleEndian();
15338
15339 // This implements the loading of unaligned vectors as described in
15340 // the venerable Apple Velocity Engine overview. Specifically:
15341 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15342 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15343 //
15344 // The general idea is to expand a sequence of one or more unaligned
15345 // loads into an alignment-based permutation-control instruction (lvsl
15346 // or lvsr), a series of regular vector loads (which always truncate
15347 // their input address to an aligned address), and a series of
15348 // permutations. The results of these permutations are the requested
15349 // loaded values. The trick is that the last "extra" load is not taken
15350 // from the address you might suspect (sizeof(vector) bytes after the
15351 // last requested load), but rather sizeof(vector) - 1 bytes after the
15352 // last requested vector. The point of this is to avoid a page fault if
15353 // the base address happened to be aligned. This works because if the
15354 // base address is aligned, then adding less than a full vector length
15355 // will cause the last vector in the sequence to be (re)loaded.
15356 // Otherwise, the next vector will be fetched as you might suspect was
15357 // necessary.
15358
15359 // We might be able to reuse the permutation generation from
15360 // a different base address offset from this one by an aligned amount.
15361 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15362 // optimization later.
15363 Intrinsic::ID Intr, IntrLD, IntrPerm;
15364 MVT PermCntlTy, PermTy, LDTy;
15365 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15366 : Intrinsic::ppc_altivec_lvsl;
15367 IntrLD = Intrinsic::ppc_altivec_lvx;
15368 IntrPerm = Intrinsic::ppc_altivec_vperm;
15369 PermCntlTy = MVT::v16i8;
15370 PermTy = MVT::v4i32;
15371 LDTy = MVT::v4i32;
15372
15373 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15374
15375 // Create the new MMO for the new base load. It is like the original MMO,
15376 // but represents an area in memory almost twice the vector size centered
15377 // on the original address. If the address is unaligned, we might start
15378 // reading up to (sizeof(vector)-1) bytes below the address of the
15379 // original unaligned load.
15380 MachineFunction &MF = DAG.getMachineFunction();
15381 MachineMemOperand *BaseMMO =
15382 MF.getMachineMemOperand(LD->getMemOperand(),
15383 -(long)MemVT.getStoreSize()+1,
15384 2*MemVT.getStoreSize()-1);
15385
15386 // Create the new base load.
15387 SDValue LDXIntID =
15388 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15389 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15390 SDValue BaseLoad =
15391 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15392 DAG.getVTList(PermTy, MVT::Other),
15393 BaseLoadOps, LDTy, BaseMMO);
15394
15395 // Note that the value of IncOffset (which is provided to the next
15396 // load's pointer info offset value, and thus used to calculate the
15397 // alignment), and the value of IncValue (which is actually used to
15398 // increment the pointer value) are different! This is because we
15399 // require the next load to appear to be aligned, even though it
15400 // is actually offset from the base pointer by a lesser amount.
15401 int IncOffset = VT.getSizeInBits() / 8;
15402 int IncValue = IncOffset;
15403
15404 // Walk (both up and down) the chain looking for another load at the real
15405 // (aligned) offset (the alignment of the other load does not matter in
15406 // this case). If found, then do not use the offset reduction trick, as
15407 // that will prevent the loads from being later combined (as they would
15408 // otherwise be duplicates).
15409 if (!findConsecutiveLoad(LD, DAG))
15410 --IncValue;
15411
15412 SDValue Increment =
15413 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15414 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15415
15416 MachineMemOperand *ExtraMMO =
15417 MF.getMachineMemOperand(LD->getMemOperand(),
15418 1, 2*MemVT.getStoreSize()-1);
15419 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15420 SDValue ExtraLoad =
15421 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15422 DAG.getVTList(PermTy, MVT::Other),
15423 ExtraLoadOps, LDTy, ExtraMMO);
15424
15425 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15426 BaseLoad.getValue(1), ExtraLoad.getValue(1));
15427
15428 // Because vperm has a big-endian bias, we must reverse the order
15429 // of the input vectors and complement the permute control vector
15430 // when generating little endian code. We have already handled the
15431 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15432 // and ExtraLoad here.
15433 SDValue Perm;
15434 if (isLittleEndian)
15435 Perm = BuildIntrinsicOp(IntrPerm,
15436 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15437 else
15438 Perm = BuildIntrinsicOp(IntrPerm,
15439 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15440
15441 if (VT != PermTy)
15442 Perm = Subtarget.hasAltivec()
15443 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15444 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15445 DAG.getTargetConstant(1, dl, MVT::i64));
15446 // second argument is 1 because this rounding
15447 // is always exact.
15448
15449 // The output of the permutation is our loaded result, the TokenFactor is
15450 // our new chain.
15451 DCI.CombineTo(N, Perm, TF);
15452 return SDValue(N, 0);
15453 }
15454 }
15455 break;
15456 case ISD::INTRINSIC_WO_CHAIN: {
15457 bool isLittleEndian = Subtarget.isLittleEndian();
15458 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15459 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15460 : Intrinsic::ppc_altivec_lvsl);
15461 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15462 SDValue Add = N->getOperand(1);
15463
15464 int Bits = 4 /* 16 byte alignment */;
15465
15466 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15467 APInt::getAllOnes(Bits /* alignment */)
15468 .zext(Add.getScalarValueSizeInBits()))) {
15469 SDNode *BasePtr = Add->getOperand(0).getNode();
15470 for (SDNode *U : BasePtr->uses()) {
15471 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15472 cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15473 // We've found another LVSL/LVSR, and this address is an aligned
15474 // multiple of that one. The results will be the same, so use the
15475 // one we've just found instead.
15476
15477 return SDValue(U, 0);
15478 }
15479 }
15480 }
15481
15482 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15483 SDNode *BasePtr = Add->getOperand(0).getNode();
15484 for (SDNode *U : BasePtr->uses()) {
15485 if (U->getOpcode() == ISD::ADD &&
15486 isa<ConstantSDNode>(U->getOperand(1)) &&
15487 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15488 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15489 (1ULL << Bits) ==
15490 0) {
15491 SDNode *OtherAdd = U;
15492 for (SDNode *V : OtherAdd->uses()) {
15493 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15494 cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15495 IID) {
15496 return SDValue(V, 0);
15497 }
15498 }
15499 }
15500 }
15501 }
15502 }
15503
15504 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15505 // Expose the vabsduw/h/b opportunity for down stream
15506 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15507 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15508 IID == Intrinsic::ppc_altivec_vmaxsh ||
15509 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15510 SDValue V1 = N->getOperand(1);
15511 SDValue V2 = N->getOperand(2);
15512 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15513 V1.getSimpleValueType() == MVT::v8i16 ||
15514 V1.getSimpleValueType() == MVT::v16i8) &&
15515 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15516 // (0-a, a)
15517 if (V1.getOpcode() == ISD::SUB &&
15518 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15519 V1.getOperand(1) == V2) {
15520 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15521 }
15522 // (a, 0-a)
15523 if (V2.getOpcode() == ISD::SUB &&
15524 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15525 V2.getOperand(1) == V1) {
15526 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15527 }
15528 // (x-y, y-x)
15529 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15530 V1.getOperand(0) == V2.getOperand(1) &&
15531 V1.getOperand(1) == V2.getOperand(0)) {
15532 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15533 }
15534 }
15535 }
15536 }
15537
15538 break;
15539 case ISD::INTRINSIC_W_CHAIN:
15540 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15541 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15542 if (Subtarget.needsSwapsForVSXMemOps()) {
15543 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15544 default:
15545 break;
15546 case Intrinsic::ppc_vsx_lxvw4x:
15547 case Intrinsic::ppc_vsx_lxvd2x:
15548 return expandVSXLoadForLE(N, DCI);
15549 }
15550 }
15551 break;
15552 case ISD::INTRINSIC_VOID:
15553 // For little endian, VSX stores require generating xxswapd/stxvd2x.
15554 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15555 if (Subtarget.needsSwapsForVSXMemOps()) {
15556 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15557 default:
15558 break;
15559 case Intrinsic::ppc_vsx_stxvw4x:
15560 case Intrinsic::ppc_vsx_stxvd2x:
15561 return expandVSXStoreForLE(N, DCI);
15562 }
15563 }
15564 break;
15565 case ISD::BSWAP: {
15566 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15567 // For subtargets without LDBRX, we can still do better than the default
15568 // expansion even for 64-bit BSWAP (LOAD).
15569 bool Is64BitBswapOn64BitTgt =
15570 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
15571 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
15572 N->getOperand(0).hasOneUse();
15573 if (IsSingleUseNormalLd &&
15574 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15575 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15576 SDValue Load = N->getOperand(0);
15577 LoadSDNode *LD = cast<LoadSDNode>(Load);
15578 // Create the byte-swapping load.
15579 SDValue Ops[] = {
15580 LD->getChain(), // Chain
15581 LD->getBasePtr(), // Ptr
15582 DAG.getValueType(N->getValueType(0)) // VT
15583 };
15584 SDValue BSLoad =
15585 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15586 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15587 MVT::i64 : MVT::i32, MVT::Other),
15588 Ops, LD->getMemoryVT(), LD->getMemOperand());
15589
15590 // If this is an i16 load, insert the truncate.
15591 SDValue ResVal = BSLoad;
15592 if (N->getValueType(0) == MVT::i16)
15593 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15594
15595 // First, combine the bswap away. This makes the value produced by the
15596 // load dead.
15597 DCI.CombineTo(N, ResVal);
15598
15599 // Next, combine the load away, we give it a bogus result value but a real
15600 // chain result. The result value is dead because the bswap is dead.
15601 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15602
15603 // Return N so it doesn't get rechecked!
15604 return SDValue(N, 0);
15605 }
15606 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
15607 // before legalization so that the BUILD_PAIR is handled correctly.
15608 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
15609 !IsSingleUseNormalLd)
15610 return SDValue();
15611 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
15612
15613 // Can't split volatile or atomic loads.
15614 if (!LD->isSimple())
15615 return SDValue();
15616 SDValue BasePtr = LD->getBasePtr();
15617 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
15618 LD->getPointerInfo(), LD->getAlignment());
15619 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
15620 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15621 DAG.getIntPtrConstant(4, dl));
15622 MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
15623 LD->getMemOperand(), 4, 4);
15624 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
15625 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
15626 SDValue Res;
15627 if (Subtarget.isLittleEndian())
15628 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
15629 else
15630 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
15631 SDValue TF =
15632 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15633 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15634 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
15635 return Res;
15636 }
15637 case PPCISD::VCMP:
15638 // If a VCMP_rec node already exists with exactly the same operands as this
15639 // node, use its result instead of this node (VCMP_rec computes both a CR6
15640 // and a normal output).
15641 //
15642 if (!N->getOperand(0).hasOneUse() &&
15643 !N->getOperand(1).hasOneUse() &&
15644 !N->getOperand(2).hasOneUse()) {
15645
15646 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15647 SDNode *VCMPrecNode = nullptr;
15648
15649 SDNode *LHSN = N->getOperand(0).getNode();
15650 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15651 UI != E; ++UI)
15652 if (UI->getOpcode() == PPCISD::VCMP_rec &&
15653 UI->getOperand(1) == N->getOperand(1) &&
15654 UI->getOperand(2) == N->getOperand(2) &&
15655 UI->getOperand(0) == N->getOperand(0)) {
15656 VCMPrecNode = *UI;
15657 break;
15658 }
15659
15660 // If there is no VCMP_rec node, or if the flag value has a single use,
15661 // don't transform this.
15662 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15663 break;
15664
15665 // Look at the (necessarily single) use of the flag value. If it has a
15666 // chain, this transformation is more complex. Note that multiple things
15667 // could use the value result, which we should ignore.
15668 SDNode *FlagUser = nullptr;
15669 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15670 FlagUser == nullptr; ++UI) {
15671 assert(UI != VCMPrecNode->use_end() && "Didn't find user!")(static_cast <bool> (UI != VCMPrecNode->use_end() &&
"Didn't find user!") ? void (0) : __assert_fail ("UI != VCMPrecNode->use_end() && \"Didn't find user!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15671, __extension__
__PRETTY_FUNCTION__))
;
15672 SDNode *User = *UI;
15673 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15674 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15675 FlagUser = User;
15676 break;
15677 }
15678 }
15679 }
15680
15681 // If the user is a MFOCRF instruction, we know this is safe.
15682 // Otherwise we give up for right now.
15683 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15684 return SDValue(VCMPrecNode, 0);
15685 }
15686 break;
15687 case ISD::BRCOND: {
15688 SDValue Cond = N->getOperand(1);
15689 SDValue Target = N->getOperand(2);
15690
15691 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15692 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15693 Intrinsic::loop_decrement) {
15694
15695 // We now need to make the intrinsic dead (it cannot be instruction
15696 // selected).
15697 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15698 assert(Cond.getNode()->hasOneUse() &&(static_cast <bool> (Cond.getNode()->hasOneUse() &&
"Counter decrement has more than one use") ? void (0) : __assert_fail
("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15699, __extension__
__PRETTY_FUNCTION__))
15699 "Counter decrement has more than one use")(static_cast <bool> (Cond.getNode()->hasOneUse() &&
"Counter decrement has more than one use") ? void (0) : __assert_fail
("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15699, __extension__
__PRETTY_FUNCTION__))
;
15700
15701 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15702 N->getOperand(0), Target);
15703 }
15704 }
15705 break;
15706 case ISD::BR_CC: {
15707 // If this is a branch on an altivec predicate comparison, lower this so
15708 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15709 // lowering is done pre-legalize, because the legalizer lowers the predicate
15710 // compare down to code that is difficult to reassemble.
15711 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15712 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15713
15714 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15715 // value. If so, pass-through the AND to get to the intrinsic.
15716 if (LHS.getOpcode() == ISD::AND &&
15717 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15718 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15719 Intrinsic::loop_decrement &&
15720 isa<ConstantSDNode>(LHS.getOperand(1)) &&
15721 !isNullConstant(LHS.getOperand(1)))
15722 LHS = LHS.getOperand(0);
15723
15724 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15725 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15726 Intrinsic::loop_decrement &&
15727 isa<ConstantSDNode>(RHS)) {
15728 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(static_cast <bool> ((CC == ISD::SETEQ || CC == ISD::SETNE
) && "Counter decrement comparison is not EQ or NE") ?
void (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15729, __extension__
__PRETTY_FUNCTION__))
15729 "Counter decrement comparison is not EQ or NE")(static_cast <bool> ((CC == ISD::SETEQ || CC == ISD::SETNE
) && "Counter decrement comparison is not EQ or NE") ?
void (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15729, __extension__
__PRETTY_FUNCTION__))
;
15730
15731 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15732 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15733 (CC == ISD::SETNE && !Val);
15734
15735 // We now need to make the intrinsic dead (it cannot be instruction
15736 // selected).
15737 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15738 assert(LHS.getNode()->hasOneUse() &&(static_cast <bool> (LHS.getNode()->hasOneUse() &&
"Counter decrement has more than one use") ? void (0) : __assert_fail
("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15739, __extension__
__PRETTY_FUNCTION__))
15739 "Counter decrement has more than one use")(static_cast <bool> (LHS.getNode()->hasOneUse() &&
"Counter decrement has more than one use") ? void (0) : __assert_fail
("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15739, __extension__
__PRETTY_FUNCTION__))
;
15740
15741 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15742 N->getOperand(0), N->getOperand(4));
15743 }
15744
15745 int CompareOpc;
15746 bool isDot;
15747
15748 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15749 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15750 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15751 assert(isDot && "Can't compare against a vector result!")(static_cast <bool> (isDot && "Can't compare against a vector result!"
) ? void (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 15751, __extension__
__PRETTY_FUNCTION__))
;
15752
15753 // If this is a comparison against something other than 0/1, then we know
15754 // that the condition is never/always true.
15755 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15756 if (Val != 0 && Val != 1) {
15757 if (CC == ISD::SETEQ) // Cond never true, remove branch.
15758 return N->getOperand(0);
15759 // Always !=, turn it into an unconditional branch.
15760 return DAG.getNode(ISD::BR, dl, MVT::Other,
15761 N->getOperand(0), N->getOperand(4));
15762 }
15763
15764 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15765
15766 // Create the PPCISD altivec 'dot' comparison node.
15767 SDValue Ops[] = {
15768 LHS.getOperand(2), // LHS of compare
15769 LHS.getOperand(3), // RHS of compare
15770 DAG.getConstant(CompareOpc, dl, MVT::i32)
15771 };
15772 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15773 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15774
15775 // Unpack the result based on how the target uses it.
15776 PPC::Predicate CompOpc;
15777 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15778 default: // Can't happen, don't crash on invalid number though.
15779 case 0: // Branch on the value of the EQ bit of CR6.
15780 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15781 break;
15782 case 1: // Branch on the inverted value of the EQ bit of CR6.
15783 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15784 break;
15785 case 2: // Branch on the value of the LT bit of CR6.
15786 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15787 break;
15788 case 3: // Branch on the inverted value of the LT bit of CR6.
15789 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15790 break;
15791 }
15792
15793 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15794 DAG.getConstant(CompOpc, dl, MVT::i32),
15795 DAG.getRegister(PPC::CR6, MVT::i32),
15796 N->getOperand(4), CompNode.getValue(1));
15797 }
15798 break;
15799 }
15800 case ISD::BUILD_VECTOR:
15801 return DAGCombineBuildVector(N, DCI);
15802 case ISD::ABS:
15803 return combineABS(N, DCI);
15804 case ISD::VSELECT:
15805 return combineVSelect(N, DCI);
15806 }
15807
15808 return SDValue();
15809}
15810
15811SDValue
15812PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15813 SelectionDAG &DAG,
15814 SmallVectorImpl<SDNode *> &Created) const {
15815 // fold (sdiv X, pow2)
15816 EVT VT = N->getValueType(0);
15817 if (VT == MVT::i64 && !Subtarget.isPPC64())
15818 return SDValue();
15819 if ((VT != MVT::i32 && VT != MVT::i64) ||
15820 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
15821 return SDValue();
15822
15823 SDLoc DL(N);
15824 SDValue N0 = N->getOperand(0);
15825
15826 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
15827 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15828 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15829
15830 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15831 Created.push_back(Op.getNode());
15832
15833 if (IsNegPow2) {
15834 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15835 Created.push_back(Op.getNode());
15836 }
15837
15838 return Op;
15839}
15840
15841//===----------------------------------------------------------------------===//
15842// Inline Assembly Support
15843//===----------------------------------------------------------------------===//
15844
15845void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15846 KnownBits &Known,
15847 const APInt &DemandedElts,
15848 const SelectionDAG &DAG,
15849 unsigned Depth) const {
15850 Known.resetAll();
15851 switch (Op.getOpcode()) {
15852 default: break;
15853 case PPCISD::LBRX: {
15854 // lhbrx is known to have the top bits cleared out.
15855 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15856 Known.Zero = 0xFFFF0000;
15857 break;
15858 }
15859 case ISD::INTRINSIC_WO_CHAIN: {
15860 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15861 default: break;
15862 case Intrinsic::ppc_altivec_vcmpbfp_p:
15863 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15864 case Intrinsic::ppc_altivec_vcmpequb_p:
15865 case Intrinsic::ppc_altivec_vcmpequh_p:
15866 case Intrinsic::ppc_altivec_vcmpequw_p:
15867 case Intrinsic::ppc_altivec_vcmpequd_p:
15868 case Intrinsic::ppc_altivec_vcmpequq_p:
15869 case Intrinsic::ppc_altivec_vcmpgefp_p:
15870 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15871 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15872 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15873 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15874 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15875 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15876 case Intrinsic::ppc_altivec_vcmpgtub_p:
15877 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15878 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15879 case Intrinsic::ppc_altivec_vcmpgtud_p:
15880 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15881 Known.Zero = ~1U; // All bits but the low one are known to be zero.
15882 break;
15883 }
15884 break;
15885 }
15886 case ISD::INTRINSIC_W_CHAIN: {
15887 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
15888 default:
15889 break;
15890 case Intrinsic::ppc_load2r:
15891 // Top bits are cleared for load2r (which is the same as lhbrx).
15892 Known.Zero = 0xFFFF0000;
15893 break;
15894 }
15895 break;
15896 }
15897 }
15898}
15899
15900Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15901 switch (Subtarget.getCPUDirective()) {
15902 default: break;
15903 case PPC::DIR_970:
15904 case PPC::DIR_PWR4:
15905 case PPC::DIR_PWR5:
15906 case PPC::DIR_PWR5X:
15907 case PPC::DIR_PWR6:
15908 case PPC::DIR_PWR6X:
15909 case PPC::DIR_PWR7:
15910 case PPC::DIR_PWR8:
15911 case PPC::DIR_PWR9:
15912 case PPC::DIR_PWR10:
15913 case PPC::DIR_PWR_FUTURE: {
15914 if (!ML)
15915 break;
15916
15917 if (!DisableInnermostLoopAlign32) {
15918 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15919 // so that we can decrease cache misses and branch-prediction misses.
15920 // Actual alignment of the loop will depend on the hotness check and other
15921 // logic in alignBlocks.
15922 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15923 return Align(32);
15924 }
15925
15926 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15927
15928 // For small loops (between 5 and 8 instructions), align to a 32-byte
15929 // boundary so that the entire loop fits in one instruction-cache line.
15930 uint64_t LoopSize = 0;
15931 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15932 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15933 LoopSize += TII->getInstSizeInBytes(*J);
15934 if (LoopSize > 32)
15935 break;
15936 }
15937
15938 if (LoopSize > 16 && LoopSize <= 32)
15939 return Align(32);
15940
15941 break;
15942 }
15943 }
15944
15945 return TargetLowering::getPrefLoopAlignment(ML);
15946}
15947
15948/// getConstraintType - Given a constraint, return the type of
15949/// constraint it is for this target.
15950PPCTargetLowering::ConstraintType
15951PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15952 if (Constraint.size() == 1) {
15953 switch (Constraint[0]) {
15954 default: break;
15955 case 'b':
15956 case 'r':
15957 case 'f':
15958 case 'd':
15959 case 'v':
15960 case 'y':
15961 return C_RegisterClass;
15962 case 'Z':
15963 // FIXME: While Z does indicate a memory constraint, it specifically
15964 // indicates an r+r address (used in conjunction with the 'y' modifier
15965 // in the replacement string). Currently, we're forcing the base
15966 // register to be r0 in the asm printer (which is interpreted as zero)
15967 // and forming the complete address in the second register. This is
15968 // suboptimal.
15969 return C_Memory;
15970 }
15971 } else if (Constraint == "wc") { // individual CR bits.
15972 return C_RegisterClass;
15973 } else if (Constraint == "wa" || Constraint == "wd" ||
15974 Constraint == "wf" || Constraint == "ws" ||
15975 Constraint == "wi" || Constraint == "ww") {
15976 return C_RegisterClass; // VSX registers.
15977 }
15978 return TargetLowering::getConstraintType(Constraint);
15979}
15980
15981/// Examine constraint type and operand type and determine a weight value.
15982/// This object must already have been set up with the operand type
15983/// and the current alternative constraint selected.
15984TargetLowering::ConstraintWeight
15985PPCTargetLowering::getSingleConstraintMatchWeight(
15986 AsmOperandInfo &info, const char *constraint) const {
15987 ConstraintWeight weight = CW_Invalid;
15988 Value *CallOperandVal = info.CallOperandVal;
15989 // If we don't have a value, we can't do a match,
15990 // but allow it at the lowest weight.
15991 if (!CallOperandVal)
15992 return CW_Default;
15993 Type *type = CallOperandVal->getType();
15994
15995 // Look at the constraint type.
15996 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15997 return CW_Register; // an individual CR bit.
15998 else if ((StringRef(constraint) == "wa" ||
15999 StringRef(constraint) == "wd" ||
16000 StringRef(constraint) == "wf") &&
16001 type->isVectorTy())
16002 return CW_Register;
16003 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16004 return CW_Register; // just hold 64-bit integers data.
16005 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16006 return CW_Register;
16007 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16008 return CW_Register;
16009
16010 switch (*constraint) {
16011 default:
16012 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
16013 break;
16014 case 'b':
16015 if (type->isIntegerTy())
16016 weight = CW_Register;
16017 break;
16018 case 'f':
16019 if (type->isFloatTy())
16020 weight = CW_Register;
16021 break;
16022 case 'd':
16023 if (type->isDoubleTy())
16024 weight = CW_Register;
16025 break;
16026 case 'v':
16027 if (type->isVectorTy())
16028 weight = CW_Register;
16029 break;
16030 case 'y':
16031 weight = CW_Register;
16032 break;
16033 case 'Z':
16034 weight = CW_Memory;
16035 break;
16036 }
16037 return weight;
16038}
16039
16040std::pair<unsigned, const TargetRegisterClass *>
16041PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16042 StringRef Constraint,
16043 MVT VT) const {
16044 if (Constraint.size() == 1) {
16045 // GCC RS6000 Constraint Letters
16046 switch (Constraint[0]) {
16047 case 'b': // R1-R31
16048 if (VT == MVT::i64 && Subtarget.isPPC64())
16049 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16050 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16051 case 'r': // R0-R31
16052 if (VT == MVT::i64 && Subtarget.isPPC64())
16053 return std::make_pair(0U, &PPC::G8RCRegClass);
16054 return std::make_pair(0U, &PPC::GPRCRegClass);
16055 // 'd' and 'f' constraints are both defined to be "the floating point
16056 // registers", where one is for 32-bit and the other for 64-bit. We don't
16057 // really care overly much here so just give them all the same reg classes.
16058 case 'd':
16059 case 'f':
16060 if (Subtarget.hasSPE()) {
16061 if (VT == MVT::f32 || VT == MVT::i32)
16062 return std::make_pair(0U, &PPC::GPRCRegClass);
16063 if (VT == MVT::f64 || VT == MVT::i64)
16064 return std::make_pair(0U, &PPC::SPERCRegClass);
16065 } else {
16066 if (VT == MVT::f32 || VT == MVT::i32)
16067 return std::make_pair(0U, &PPC::F4RCRegClass);
16068 if (VT == MVT::f64 || VT == MVT::i64)
16069 return std::make_pair(0U, &PPC::F8RCRegClass);
16070 }
16071 break;
16072 case 'v':
16073 if (Subtarget.hasAltivec() && VT.isVector())
16074 return std::make_pair(0U, &PPC::VRRCRegClass);
16075 else if (Subtarget.hasVSX())
16076 // Scalars in Altivec registers only make sense with VSX.
16077 return std::make_pair(0U, &PPC::VFRCRegClass);
16078 break;
16079 case 'y': // crrc
16080 return std::make_pair(0U, &PPC::CRRCRegClass);
16081 }
16082 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16083 // An individual CR bit.
16084 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16085 } else if ((Constraint == "wa" || Constraint == "wd" ||
16086 Constraint == "wf" || Constraint == "wi") &&
16087 Subtarget.hasVSX()) {
16088 // A VSX register for either a scalar (FP) or vector. There is no
16089 // support for single precision scalars on subtargets prior to Power8.
16090 if (VT.isVector())
16091 return std::make_pair(0U, &PPC::VSRCRegClass);
16092 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16093 return std::make_pair(0U, &PPC::VSSRCRegClass);
16094 return std::make_pair(0U, &PPC::VSFRCRegClass);
16095 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16096 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16097 return std::make_pair(0U, &PPC::VSSRCRegClass);
16098 else
16099 return std::make_pair(0U, &PPC::VSFRCRegClass);
16100 } else if (Constraint == "lr") {
16101 if (VT == MVT::i64)
16102 return std::make_pair(0U, &PPC::LR8RCRegClass);
16103 else
16104 return std::make_pair(0U, &PPC::LRRCRegClass);
16105 }
16106
16107 // Handle special cases of physical registers that are not properly handled
16108 // by the base class.
16109 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16110 // If we name a VSX register, we can't defer to the base class because it
16111 // will not recognize the correct register (their names will be VSL{0-31}
16112 // and V{0-31} so they won't match). So we match them here.
16113 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16114 int VSNum = atoi(Constraint.data() + 3);
16115 assert(VSNum >= 0 && VSNum <= 63 &&(static_cast <bool> (VSNum >= 0 && VSNum <=
63 && "Attempted to access a vsr out of range") ? void
(0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16116, __extension__
__PRETTY_FUNCTION__))
16116 "Attempted to access a vsr out of range")(static_cast <bool> (VSNum >= 0 && VSNum <=
63 && "Attempted to access a vsr out of range") ? void
(0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16116, __extension__
__PRETTY_FUNCTION__))
;
16117 if (VSNum < 32)
16118 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16119 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16120 }
16121
16122 // For float registers, we can't defer to the base class as it will match
16123 // the SPILLTOVSRRC class.
16124 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16125 int RegNum = atoi(Constraint.data() + 2);
16126 if (RegNum > 31 || RegNum < 0)
16127 report_fatal_error("Invalid floating point register number");
16128 if (VT == MVT::f32 || VT == MVT::i32)
16129 return Subtarget.hasSPE()
16130 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16131 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16132 if (VT == MVT::f64 || VT == MVT::i64)
16133 return Subtarget.hasSPE()
16134 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16135 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16136 }
16137 }
16138
16139 std::pair<unsigned, const TargetRegisterClass *> R =
16140 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16141
16142 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16143 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16144 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16145 // register.
16146 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16147 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16148 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16149 PPC::GPRCRegClass.contains(R.first))
16150 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16151 PPC::sub_32, &PPC::G8RCRegClass),
16152 &PPC::G8RCRegClass);
16153
16154 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16155 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16156 R.first = PPC::CR0;
16157 R.second = &PPC::CRRCRegClass;
16158 }
16159 // FIXME: This warning should ideally be emitted in the front end.
16160 const auto &TM = getTargetMachine();
16161 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16162 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16163 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16164 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16165 errs() << "warning: vector registers 20 to 32 are reserved in the "
16166 "default AIX AltiVec ABI and cannot be used\n";
16167 }
16168
16169 return R;
16170}
16171
16172/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16173/// vector. If it is invalid, don't add anything to Ops.
16174void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
16175 std::string &Constraint,
16176 std::vector<SDValue>&Ops,
16177 SelectionDAG &DAG) const {
16178 SDValue Result;
16179
16180 // Only support length 1 constraints.
16181 if (Constraint.length() > 1) return;
16182
16183 char Letter = Constraint[0];
16184 switch (Letter) {
16185 default: break;
16186 case 'I':
16187 case 'J':
16188 case 'K':
16189 case 'L':
16190 case 'M':
16191 case 'N':
16192 case 'O':
16193 case 'P': {
16194 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16195 if (!CST) return; // Must be an immediate to match.
16196 SDLoc dl(Op);
16197 int64_t Value = CST->getSExtValue();
16198 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16199 // numbers are printed as such.
16200 switch (Letter) {
16201 default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16201)
;
16202 case 'I': // "I" is a signed 16-bit constant.
16203 if (isInt<16>(Value))
16204 Result = DAG.getTargetConstant(Value, dl, TCVT);
16205 break;
16206 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16207 if (isShiftedUInt<16, 16>(Value))
16208 Result = DAG.getTargetConstant(Value, dl, TCVT);
16209 break;
16210 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16211 if (isShiftedInt<16, 16>(Value))
16212 Result = DAG.getTargetConstant(Value, dl, TCVT);
16213 break;
16214 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16215 if (isUInt<16>(Value))
16216 Result = DAG.getTargetConstant(Value, dl, TCVT);
16217 break;
16218 case 'M': // "M" is a constant that is greater than 31.
16219 if (Value > 31)
16220 Result = DAG.getTargetConstant(Value, dl, TCVT);
16221 break;
16222 case 'N': // "N" is a positive constant that is an exact power of two.
16223 if (Value > 0 && isPowerOf2_64(Value))
16224 Result = DAG.getTargetConstant(Value, dl, TCVT);
16225 break;
16226 case 'O': // "O" is the constant zero.
16227 if (Value == 0)
16228 Result = DAG.getTargetConstant(Value, dl, TCVT);
16229 break;
16230 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16231 if (isInt<16>(-Value))
16232 Result = DAG.getTargetConstant(Value, dl, TCVT);
16233 break;
16234 }
16235 break;
16236 }
16237 }
16238
16239 if (Result.getNode()) {
16240 Ops.push_back(Result);
16241 return;
16242 }
16243
16244 // Handle standard constraint letters.
16245 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16246}
16247
16248// isLegalAddressingMode - Return true if the addressing mode represented
16249// by AM is legal for this target, for a load/store of the specified type.
16250bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
16251 const AddrMode &AM, Type *Ty,
16252 unsigned AS,
16253 Instruction *I) const {
16254 // Vector type r+i form is supported since power9 as DQ form. We don't check
16255 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16256 // imm form is preferred and the offset can be adjusted to use imm form later
16257 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16258 // max offset to check legal addressing mode, we should be a little aggressive
16259 // to contain other offsets for that LSRUse.
16260 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16261 return false;
16262
16263 // PPC allows a sign-extended 16-bit immediate field.
16264 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16265 return false;
16266
16267 // No global is ever allowed as a base.
16268 if (AM.BaseGV)
16269 return false;
16270
16271 // PPC only support r+r,
16272 switch (AM.Scale) {
16273 case 0: // "r+i" or just "i", depending on HasBaseReg.
16274 break;
16275 case 1:
16276 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16277 return false;
16278 // Otherwise we have r+r or r+i.
16279 break;
16280 case 2:
16281 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16282 return false;
16283 // Allow 2*r as r+r.
16284 break;
16285 default:
16286 // No other scales are supported.
16287 return false;
16288 }
16289
16290 return true;
16291}
16292
16293SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16294 SelectionDAG &DAG) const {
16295 MachineFunction &MF = DAG.getMachineFunction();
16296 MachineFrameInfo &MFI = MF.getFrameInfo();
16297 MFI.setReturnAddressIsTaken(true);
16298
16299 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
16300 return SDValue();
16301
16302 SDLoc dl(Op);
16303 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16304
16305 // Make sure the function does not optimize away the store of the RA to
16306 // the stack.
16307 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16308 FuncInfo->setLRStoreRequired();
16309 bool isPPC64 = Subtarget.isPPC64();
16310 auto PtrVT = getPointerTy(MF.getDataLayout());
16311
16312 if (Depth > 0) {
16313 // The link register (return address) is saved in the caller's frame
16314 // not the callee's stack frame. So we must get the caller's frame
16315 // address and load the return address at the LR offset from there.
16316 SDValue FrameAddr =
16317 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16318 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16319 SDValue Offset =
16320 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16321 isPPC64 ? MVT::i64 : MVT::i32);
16322 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16323 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16324 MachinePointerInfo());
16325 }
16326
16327 // Just load the return address off the stack.
16328 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16329 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16330 MachinePointerInfo());
16331}
16332
16333SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16334 SelectionDAG &DAG) const {
16335 SDLoc dl(Op);
16336 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16337
16338 MachineFunction &MF = DAG.getMachineFunction();
16339 MachineFrameInfo &MFI = MF.getFrameInfo();
16340 MFI.setFrameAddressIsTaken(true);
16341
16342 EVT PtrVT = getPointerTy(MF.getDataLayout());
16343 bool isPPC64 = PtrVT == MVT::i64;
16344
16345 // Naked functions never have a frame pointer, and so we use r1. For all
16346 // other functions, this decision must be delayed until during PEI.
16347 unsigned FrameReg;
16348 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16349 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16350 else
16351 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16352
16353 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16354 PtrVT);
16355 while (Depth--)
16356 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16357 FrameAddr, MachinePointerInfo());
16358 return FrameAddr;
16359}
16360
16361// FIXME? Maybe this could be a TableGen attribute on some registers and
16362// this table could be generated automatically from RegInfo.
16363Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
16364 const MachineFunction &MF) const {
16365 bool isPPC64 = Subtarget.isPPC64();
16366
16367 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16368 if (!is64Bit && VT != LLT::scalar(32))
16369 report_fatal_error("Invalid register global variable type");
16370
16371 Register Reg = StringSwitch<Register>(RegName)
16372 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16373 .Case("r2", isPPC64 ? Register() : PPC::R2)
16374 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16375 .Default(Register());
16376
16377 if (Reg)
16378 return Reg;
16379 report_fatal_error("Invalid register name global variable");
16380}
16381
16382bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
16383 // 32-bit SVR4 ABI access everything as got-indirect.
16384 if (Subtarget.is32BitELFABI())
16385 return true;
16386
16387 // AIX accesses everything indirectly through the TOC, which is similar to
16388 // the GOT.
16389 if (Subtarget.isAIXABI())
16390 return true;
16391
16392 CodeModel::Model CModel = getTargetMachine().getCodeModel();
16393 // If it is small or large code model, module locals are accessed
16394 // indirectly by loading their address from .toc/.got.
16395 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16396 return true;
16397
16398 // JumpTable and BlockAddress are accessed as got-indirect.
16399 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16400 return true;
16401
16402 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16403 return Subtarget.isGVIndirectSymbol(G->getGlobal());
16404
16405 return false;
16406}
16407
16408bool
16409PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
16410 // The PowerPC target isn't yet aware of offsets.
16411 return false;
16412}
16413
16414bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
16415 const CallInst &I,
16416 MachineFunction &MF,
16417 unsigned Intrinsic) const {
16418 switch (Intrinsic) {
16419 case Intrinsic::ppc_atomicrmw_xchg_i128:
16420 case Intrinsic::ppc_atomicrmw_add_i128:
16421 case Intrinsic::ppc_atomicrmw_sub_i128:
16422 case Intrinsic::ppc_atomicrmw_nand_i128:
16423 case Intrinsic::ppc_atomicrmw_and_i128:
16424 case Intrinsic::ppc_atomicrmw_or_i128:
16425 case Intrinsic::ppc_atomicrmw_xor_i128:
16426 case Intrinsic::ppc_cmpxchg_i128:
16427 Info.opc = ISD::INTRINSIC_W_CHAIN;
16428 Info.memVT = MVT::i128;
16429 Info.ptrVal = I.getArgOperand(0);
16430 Info.offset = 0;
16431 Info.align = Align(16);
16432 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
16433 MachineMemOperand::MOVolatile;
16434 return true;
16435 case Intrinsic::ppc_atomic_load_i128:
16436 Info.opc = ISD::INTRINSIC_W_CHAIN;
16437 Info.memVT = MVT::i128;
16438 Info.ptrVal = I.getArgOperand(0);
16439 Info.offset = 0;
16440 Info.align = Align(16);
16441 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
16442 return true;
16443 case Intrinsic::ppc_atomic_store_i128:
16444 Info.opc = ISD::INTRINSIC_VOID;
16445 Info.memVT = MVT::i128;
16446 Info.ptrVal = I.getArgOperand(2);
16447 Info.offset = 0;
16448 Info.align = Align(16);
16449 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
16450 return true;
16451 case Intrinsic::ppc_altivec_lvx:
16452 case Intrinsic::ppc_altivec_lvxl:
16453 case Intrinsic::ppc_altivec_lvebx:
16454 case Intrinsic::ppc_altivec_lvehx:
16455 case Intrinsic::ppc_altivec_lvewx:
16456 case Intrinsic::ppc_vsx_lxvd2x:
16457 case Intrinsic::ppc_vsx_lxvw4x:
16458 case Intrinsic::ppc_vsx_lxvd2x_be:
16459 case Intrinsic::ppc_vsx_lxvw4x_be:
16460 case Intrinsic::ppc_vsx_lxvl:
16461 case Intrinsic::ppc_vsx_lxvll: {
16462 EVT VT;
16463 switch (Intrinsic) {
16464 case Intrinsic::ppc_altivec_lvebx:
16465 VT = MVT::i8;
16466 break;
16467 case Intrinsic::ppc_altivec_lvehx:
16468 VT = MVT::i16;
16469 break;
16470 case Intrinsic::ppc_altivec_lvewx:
16471 VT = MVT::i32;
16472 break;
16473 case Intrinsic::ppc_vsx_lxvd2x:
16474 case Intrinsic::ppc_vsx_lxvd2x_be:
16475 VT = MVT::v2f64;
16476 break;
16477 default:
16478 VT = MVT::v4i32;
16479 break;
16480 }
16481
16482 Info.opc = ISD::INTRINSIC_W_CHAIN;
16483 Info.memVT = VT;
16484 Info.ptrVal = I.getArgOperand(0);
16485 Info.offset = -VT.getStoreSize()+1;
16486 Info.size = 2*VT.getStoreSize()-1;
16487 Info.align = Align(1);
16488 Info.flags = MachineMemOperand::MOLoad;
16489 return true;
16490 }
16491 case Intrinsic::ppc_altivec_stvx:
16492 case Intrinsic::ppc_altivec_stvxl:
16493 case Intrinsic::ppc_altivec_stvebx:
16494 case Intrinsic::ppc_altivec_stvehx:
16495 case Intrinsic::ppc_altivec_stvewx:
16496 case Intrinsic::ppc_vsx_stxvd2x:
16497 case Intrinsic::ppc_vsx_stxvw4x:
16498 case Intrinsic::ppc_vsx_stxvd2x_be:
16499 case Intrinsic::ppc_vsx_stxvw4x_be:
16500 case Intrinsic::ppc_vsx_stxvl:
16501 case Intrinsic::ppc_vsx_stxvll: {
16502 EVT VT;
16503 switch (Intrinsic) {
16504 case Intrinsic::ppc_altivec_stvebx:
16505 VT = MVT::i8;
16506 break;
16507 case Intrinsic::ppc_altivec_stvehx:
16508 VT = MVT::i16;
16509 break;
16510 case Intrinsic::ppc_altivec_stvewx:
16511 VT = MVT::i32;
16512 break;
16513 case Intrinsic::ppc_vsx_stxvd2x:
16514 case Intrinsic::ppc_vsx_stxvd2x_be:
16515 VT = MVT::v2f64;
16516 break;
16517 default:
16518 VT = MVT::v4i32;
16519 break;
16520 }
16521
16522 Info.opc = ISD::INTRINSIC_VOID;
16523 Info.memVT = VT;
16524 Info.ptrVal = I.getArgOperand(1);
16525 Info.offset = -VT.getStoreSize()+1;
16526 Info.size = 2*VT.getStoreSize()-1;
16527 Info.align = Align(1);
16528 Info.flags = MachineMemOperand::MOStore;
16529 return true;
16530 }
16531 default:
16532 break;
16533 }
16534
16535 return false;
16536}
16537
16538/// It returns EVT::Other if the type should be determined using generic
16539/// target-independent logic.
16540EVT PPCTargetLowering::getOptimalMemOpType(
16541 const MemOp &Op, const AttributeList &FuncAttributes) const {
16542 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16543 // We should use Altivec/VSX loads and stores when available. For unaligned
16544 // addresses, unaligned VSX loads are only fast starting with the P8.
16545 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16546 (Op.isAligned(Align(16)) ||
16547 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16548 return MVT::v4i32;
16549 }
16550
16551 if (Subtarget.isPPC64()) {
16552 return MVT::i64;
16553 }
16554
16555 return MVT::i32;
16556}
16557
16558/// Returns true if it is beneficial to convert a load of a constant
16559/// to just the constant itself.
16560bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
16561 Type *Ty) const {
16562 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16562, __extension__ __PRETTY_FUNCTION__))
;
16563
16564 unsigned BitSize = Ty->getPrimitiveSizeInBits();
16565 return !(BitSize == 0 || BitSize > 64);
16566}
16567
16568bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
16569 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16570 return false;
16571 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16572 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16573 return NumBits1 == 64 && NumBits2 == 32;
16574}
16575
16576bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
16577 if (!VT1.isInteger() || !VT2.isInteger())
16578 return false;
16579 unsigned NumBits1 = VT1.getSizeInBits();
16580 unsigned NumBits2 = VT2.getSizeInBits();
16581 return NumBits1 == 64 && NumBits2 == 32;
16582}
16583
16584bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16585 // Generally speaking, zexts are not free, but they are free when they can be
16586 // folded with other operations.
16587 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16588 EVT MemVT = LD->getMemoryVT();
16589 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16590 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16591 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16592 LD->getExtensionType() == ISD::ZEXTLOAD))
16593 return true;
16594 }
16595
16596 // FIXME: Add other cases...
16597 // - 32-bit shifts with a zext to i64
16598 // - zext after ctlz, bswap, etc.
16599 // - zext after and by a constant mask
16600
16601 return TargetLowering::isZExtFree(Val, VT2);
16602}
16603
16604bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16605 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&(static_cast <bool> (DestVT.isFloatingPoint() &&
SrcVT.isFloatingPoint() && "invalid fpext types") ? void
(0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16606, __extension__
__PRETTY_FUNCTION__))
16606 "invalid fpext types")(static_cast <bool> (DestVT.isFloatingPoint() &&
SrcVT.isFloatingPoint() && "invalid fpext types") ? void
(0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16606, __extension__
__PRETTY_FUNCTION__))
;
16607 // Extending to float128 is not free.
16608 if (DestVT == MVT::f128)
16609 return false;
16610 return true;
16611}
16612
16613bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16614 return isInt<16>(Imm) || isUInt<16>(Imm);
16615}
16616
16617bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16618 return isInt<16>(Imm) || isUInt<16>(Imm);
16619}
16620
16621bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
16622 MachineMemOperand::Flags,
16623 bool *Fast) const {
16624 if (DisablePPCUnaligned)
16625 return false;
16626
16627 // PowerPC supports unaligned memory access for simple non-vector types.
16628 // Although accessing unaligned addresses is not as efficient as accessing
16629 // aligned addresses, it is generally more efficient than manual expansion,
16630 // and generally only traps for software emulation when crossing page
16631 // boundaries.
16632
16633 if (!VT.isSimple())
16634 return false;
16635
16636 if (VT.isFloatingPoint() && !VT.isVector() &&
16637 !Subtarget.allowsUnalignedFPAccess())
16638 return false;
16639
16640 if (VT.getSimpleVT().isVector()) {
16641 if (Subtarget.hasVSX()) {
16642 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16643 VT != MVT::v4f32 && VT != MVT::v4i32)
16644 return false;
16645 } else {
16646 return false;
16647 }
16648 }
16649
16650 if (VT == MVT::ppcf128)
16651 return false;
16652
16653 if (Fast)
16654 *Fast = true;
16655
16656 return true;
16657}
16658
16659bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16660 SDValue C) const {
16661 // Check integral scalar types.
16662 if (!VT.isScalarInteger())
1
Assuming the condition is false
2
Taking false branch
16663 return false;
16664 if (auto *ConstNode
3.1
'ConstNode' is non-null
3.1
'ConstNode' is non-null
= dyn_cast<ConstantSDNode>(C.getNode())) {
3
Assuming the object is a 'ConstantSDNode'
4
Taking true branch
16665 if (!ConstNode->getAPIntValue().isSignedIntN(64))
5
Taking false branch
16666 return false;
16667 // This transformation will generate >= 2 operations. But the following
16668 // cases will generate <= 2 instructions during ISEL. So exclude them.
16669 // 1. If the constant multiplier fits 16 bits, it can be handled by one
16670 // HW instruction, ie. MULLI
16671 // 2. If the multiplier after shifted fits 16 bits, an extra shift
16672 // instruction is needed than case 1, ie. MULLI and RLDICR
16673 int64_t Imm = ConstNode->getSExtValue();
16674 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
6
Calling 'countTrailingZeros<unsigned long>'
13
Returning from 'countTrailingZeros<unsigned long>'
14
'Shift' initialized to 64
16675 Imm >>= Shift;
15
Assigned value is garbage or undefined
16676 if (isInt<16>(Imm))
16677 return false;
16678 uint64_t UImm = static_cast<uint64_t>(Imm);
16679 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16680 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16681 return true;
16682 }
16683 return false;
16684}
16685
16686bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16687 EVT VT) const {
16688 return isFMAFasterThanFMulAndFAdd(
16689 MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16690}
16691
16692bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16693 Type *Ty) const {
16694 switch (Ty->getScalarType()->getTypeID()) {
16695 case Type::FloatTyID:
16696 case Type::DoubleTyID:
16697 return true;
16698 case Type::FP128TyID:
16699 return Subtarget.hasP9Vector();
16700 default:
16701 return false;
16702 }
16703}
16704
16705// FIXME: add more patterns which are not profitable to hoist.
16706bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16707 if (!I->hasOneUse())
16708 return true;
16709
16710 Instruction *User = I->user_back();
16711 assert(User && "A single use instruction with no uses.")(static_cast <bool> (User && "A single use instruction with no uses."
) ? void (0) : __assert_fail ("User && \"A single use instruction with no uses.\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16711, __extension__
__PRETTY_FUNCTION__))
;
16712
16713 switch (I->getOpcode()) {
16714 case Instruction::FMul: {
16715 // Don't break FMA, PowerPC prefers FMA.
16716 if (User->getOpcode() != Instruction::FSub &&
16717 User->getOpcode() != Instruction::FAdd)
16718 return true;
16719
16720 const TargetOptions &Options = getTargetMachine().Options;
16721 const Function *F = I->getFunction();
16722 const DataLayout &DL = F->getParent()->getDataLayout();
16723 Type *Ty = User->getOperand(0)->getType();
16724
16725 return !(
16726 isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16727 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16728 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16729 }
16730 case Instruction::Load: {
16731 // Don't break "store (load float*)" pattern, this pattern will be combined
16732 // to "store (load int32)" in later InstCombine pass. See function
16733 // combineLoadToOperationType. On PowerPC, loading a float point takes more
16734 // cycles than loading a 32 bit integer.
16735 LoadInst *LI = cast<LoadInst>(I);
16736 // For the loads that combineLoadToOperationType does nothing, like
16737 // ordered load, it should be profitable to hoist them.
16738 // For swifterror load, it can only be used for pointer to pointer type, so
16739 // later type check should get rid of this case.
16740 if (!LI->isUnordered())
16741 return true;
16742
16743 if (User->getOpcode() != Instruction::Store)
16744 return true;
16745
16746 if (I->getType()->getTypeID() != Type::FloatTyID)
16747 return true;
16748
16749 return false;
16750 }
16751 default:
16752 return true;
16753 }
16754 return true;
16755}
16756
16757const MCPhysReg *
16758PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16759 // LR is a callee-save register, but we must treat it as clobbered by any call
16760 // site. Hence we include LR in the scratch registers, which are in turn added
16761 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16762 // to CTR, which is used by any indirect call.
16763 static const MCPhysReg ScratchRegs[] = {
16764 PPC::X12, PPC::LR8, PPC::CTR8, 0
16765 };
16766
16767 return ScratchRegs;
16768}
16769
16770Register PPCTargetLowering::getExceptionPointerRegister(
16771 const Constant *PersonalityFn) const {
16772 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16773}
16774
16775Register PPCTargetLowering::getExceptionSelectorRegister(
16776 const Constant *PersonalityFn) const {
16777 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16778}
16779
16780bool
16781PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16782 EVT VT , unsigned DefinedValues) const {
16783 if (VT == MVT::v2i64)
16784 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16785
16786 if (Subtarget.hasVSX())
16787 return true;
16788
16789 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16790}
16791
16792Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16793 if (DisableILPPref || Subtarget.enableMachineScheduler())
16794 return TargetLowering::getSchedulingPreference(N);
16795
16796 return Sched::ILP;
16797}
16798
16799// Create a fast isel object.
16800FastISel *
16801PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16802 const TargetLibraryInfo *LibInfo) const {
16803 return PPC::createFastISel(FuncInfo, LibInfo);
16804}
16805
16806// 'Inverted' means the FMA opcode after negating one multiplicand.
16807// For example, (fma -a b c) = (fnmsub a b c)
16808static unsigned invertFMAOpcode(unsigned Opc) {
16809 switch (Opc) {
16810 default:
16811 llvm_unreachable("Invalid FMA opcode for PowerPC!")::llvm::llvm_unreachable_internal("Invalid FMA opcode for PowerPC!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16811)
;
16812 case ISD::FMA:
16813 return PPCISD::FNMSUB;
16814 case PPCISD::FNMSUB:
16815 return ISD::FMA;
16816 }
16817}
16818
16819SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16820 bool LegalOps, bool OptForSize,
16821 NegatibleCost &Cost,
16822 unsigned Depth) const {
16823 if (Depth > SelectionDAG::MaxRecursionDepth)
16824 return SDValue();
16825
16826 unsigned Opc = Op.getOpcode();
16827 EVT VT = Op.getValueType();
16828 SDNodeFlags Flags = Op.getNode()->getFlags();
16829
16830 switch (Opc) {
16831 case PPCISD::FNMSUB:
16832 if (!Op.hasOneUse() || !isTypeLegal(VT))
16833 break;
16834
16835 const TargetOptions &Options = getTargetMachine().Options;
16836 SDValue N0 = Op.getOperand(0);
16837 SDValue N1 = Op.getOperand(1);
16838 SDValue N2 = Op.getOperand(2);
16839 SDLoc Loc(Op);
16840
16841 NegatibleCost N2Cost = NegatibleCost::Expensive;
16842 SDValue NegN2 =
16843 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16844
16845 if (!NegN2)
16846 return SDValue();
16847
16848 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16849 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16850 // These transformations may change sign of zeroes. For example,
16851 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16852 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16853 // Try and choose the cheaper one to negate.
16854 NegatibleCost N0Cost = NegatibleCost::Expensive;
16855 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16856 N0Cost, Depth + 1);
16857
16858 NegatibleCost N1Cost = NegatibleCost::Expensive;
16859 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16860 N1Cost, Depth + 1);
16861
16862 if (NegN0 && N0Cost <= N1Cost) {
16863 Cost = std::min(N0Cost, N2Cost);
16864 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16865 } else if (NegN1) {
16866 Cost = std::min(N1Cost, N2Cost);
16867 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16868 }
16869 }
16870
16871 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16872 if (isOperationLegal(ISD::FMA, VT)) {
16873 Cost = N2Cost;
16874 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16875 }
16876
16877 break;
16878 }
16879
16880 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16881 Cost, Depth);
16882}
16883
16884// Override to enable LOAD_STACK_GUARD lowering on Linux.
16885bool PPCTargetLowering::useLoadStackGuardNode() const {
16886 if (!Subtarget.isTargetLinux())
16887 return TargetLowering::useLoadStackGuardNode();
16888 return true;
16889}
16890
16891// Override to disable global variable loading on Linux and insert AIX canary
16892// word declaration.
16893void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16894 if (Subtarget.isAIXABI()) {
16895 M.getOrInsertGlobal(AIXSSPCanaryWordName,
16896 Type::getInt8PtrTy(M.getContext()));
16897 return;
16898 }
16899 if (!Subtarget.isTargetLinux())
16900 return TargetLowering::insertSSPDeclarations(M);
16901}
16902
16903Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
16904 if (Subtarget.isAIXABI())
16905 return M.getGlobalVariable(AIXSSPCanaryWordName);
16906 return TargetLowering::getSDagStackGuard(M);
16907}
16908
16909bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16910 bool ForCodeSize) const {
16911 if (!VT.isSimple() || !Subtarget.hasVSX())
16912 return false;
16913
16914 switch(VT.getSimpleVT().SimpleTy) {
16915 default:
16916 // For FP types that are currently not supported by PPC backend, return
16917 // false. Examples: f16, f80.
16918 return false;
16919 case MVT::f32:
16920 case MVT::f64:
16921 if (Subtarget.hasPrefixInstrs()) {
16922 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
16923 return true;
16924 }
16925 LLVM_FALLTHROUGH[[gnu::fallthrough]];
16926 case MVT::ppcf128:
16927 return Imm.isPosZero();
16928 }
16929}
16930
16931// For vector shift operation op, fold
16932// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16933static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16934 SelectionDAG &DAG) {
16935 SDValue N0 = N->getOperand(0);
16936 SDValue N1 = N->getOperand(1);
16937 EVT VT = N0.getValueType();
16938 unsigned OpSizeInBits = VT.getScalarSizeInBits();
16939 unsigned Opcode = N->getOpcode();
16940 unsigned TargetOpcode;
16941
16942 switch (Opcode) {
16943 default:
16944 llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 16944)
;
16945 case ISD::SHL:
16946 TargetOpcode = PPCISD::SHL;
16947 break;
16948 case ISD::SRL:
16949 TargetOpcode = PPCISD::SRL;
16950 break;
16951 case ISD::SRA:
16952 TargetOpcode = PPCISD::SRA;
16953 break;
16954 }
16955
16956 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16957 N1->getOpcode() == ISD::AND)
16958 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16959 if (Mask->getZExtValue() == OpSizeInBits - 1)
16960 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16961
16962 return SDValue();
16963}
16964
16965SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16966 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16967 return Value;
16968
16969 SDValue N0 = N->getOperand(0);
16970 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16971 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16972 N0.getOpcode() != ISD::SIGN_EXTEND ||
16973 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16974 N->getValueType(0) != MVT::i64)
16975 return SDValue();
16976
16977 // We can't save an operation here if the value is already extended, and
16978 // the existing shift is easier to combine.
16979 SDValue ExtsSrc = N0.getOperand(0);
16980 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16981 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16982 return SDValue();
16983
16984 SDLoc DL(N0);
16985 SDValue ShiftBy = SDValue(CN1, 0);
16986 // We want the shift amount to be i32 on the extswli, but the shift could
16987 // have an i64.
16988 if (ShiftBy.getValueType() == MVT::i64)
16989 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16990
16991 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16992 ShiftBy);
16993}
16994
16995SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16996 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16997 return Value;
16998
16999 return SDValue();
17000}
17001
17002SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17003 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17004 return Value;
17005
17006 return SDValue();
17007}
17008
17009// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17010// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17011// When C is zero, the equation (addi Z, -C) can be simplified to Z
17012// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17013static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
17014 const PPCSubtarget &Subtarget) {
17015 if (!Subtarget.isPPC64())
17016 return SDValue();
17017
17018 SDValue LHS = N->getOperand(0);
17019 SDValue RHS = N->getOperand(1);
17020
17021 auto isZextOfCompareWithConstant = [](SDValue Op) {
17022 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17023 Op.getValueType() != MVT::i64)
17024 return false;
17025
17026 SDValue Cmp = Op.getOperand(0);
17027 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17028 Cmp.getOperand(0).getValueType() != MVT::i64)
17029 return false;
17030
17031 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17032 int64_t NegConstant = 0 - Constant->getSExtValue();
17033 // Due to the limitations of the addi instruction,
17034 // -C is required to be [-32768, 32767].
17035 return isInt<16>(NegConstant);
17036 }
17037
17038 return false;
17039 };
17040
17041 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17042 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17043
17044 // If there is a pattern, canonicalize a zext operand to the RHS.
17045 if (LHSHasPattern && !RHSHasPattern)
17046 std::swap(LHS, RHS);
17047 else if (!LHSHasPattern && !RHSHasPattern)
17048 return SDValue();
17049
17050 SDLoc DL(N);
17051 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17052 SDValue Cmp = RHS.getOperand(0);
17053 SDValue Z = Cmp.getOperand(0);
17054 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17055 int64_t NegConstant = 0 - Constant->getSExtValue();
17056
17057 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17058 default: break;
17059 case ISD::SETNE: {
17060 // when C == 0
17061 // --> addze X, (addic Z, -1).carry
17062 // /
17063 // add X, (zext(setne Z, C))--
17064 // \ when -32768 <= -C <= 32767 && C != 0
17065 // --> addze X, (addic (addi Z, -C), -1).carry
17066 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17067 DAG.getConstant(NegConstant, DL, MVT::i64));
17068 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17069 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17070 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17071 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17072 SDValue(Addc.getNode(), 1));
17073 }
17074 case ISD::SETEQ: {
17075 // when C == 0
17076 // --> addze X, (subfic Z, 0).carry
17077 // /
17078 // add X, (zext(sete Z, C))--
17079 // \ when -32768 <= -C <= 32767 && C != 0
17080 // --> addze X, (subfic (addi Z, -C), 0).carry
17081 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17082 DAG.getConstant(NegConstant, DL, MVT::i64));
17083 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17084 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17085 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17086 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17087 SDValue(Subc.getNode(), 1));
17088 }
17089 }
17090
17091 return SDValue();
17092}
17093
17094// Transform
17095// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17096// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17097// In this case both C1 and C2 must be known constants.
17098// C1+C2 must fit into a 34 bit signed integer.
17099static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
17100 const PPCSubtarget &Subtarget) {
17101 if (!Subtarget.isUsingPCRelativeCalls())
17102 return SDValue();
17103
17104 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17105 // If we find that node try to cast the Global Address and the Constant.
17106 SDValue LHS = N->getOperand(0);
17107 SDValue RHS = N->getOperand(1);
17108
17109 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17110 std::swap(LHS, RHS);
17111
17112 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17113 return SDValue();
17114
17115 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17116 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17117 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17118
17119 // Check that both casts succeeded.
17120 if (!GSDN || !ConstNode)
17121 return SDValue();
17122
17123 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17124 SDLoc DL(GSDN);
17125
17126 // The signed int offset needs to fit in 34 bits.
17127 if (!isInt<34>(NewOffset))
17128 return SDValue();
17129
17130 // The new global address is a copy of the old global address except
17131 // that it has the updated Offset.
17132 SDValue GA =
17133 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17134 NewOffset, GSDN->getTargetFlags());
17135 SDValue MatPCRel =
17136 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17137 return MatPCRel;
17138}
17139
17140SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17141 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17142 return Value;
17143
17144 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17145 return Value;
17146
17147 return SDValue();
17148}
17149
17150// Detect TRUNCATE operations on bitcasts of float128 values.
17151// What we are looking for here is the situtation where we extract a subset
17152// of bits from a 128 bit float.
17153// This can be of two forms:
17154// 1) BITCAST of f128 feeding TRUNCATE
17155// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17156// The reason this is required is because we do not have a legal i128 type
17157// and so we want to prevent having to store the f128 and then reload part
17158// of it.
17159SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17160 DAGCombinerInfo &DCI) const {
17161 // If we are using CRBits then try that first.
17162 if (Subtarget.useCRBits()) {
17163 // Check if CRBits did anything and return that if it did.
17164 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17165 return CRTruncValue;
17166 }
17167
17168 SDLoc dl(N);
17169 SDValue Op0 = N->getOperand(0);
17170
17171 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
17172 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
17173 EVT VT = N->getValueType(0);
17174 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17175 return SDValue();
17176 SDValue Sub = Op0.getOperand(0);
17177 if (Sub.getOpcode() == ISD::SUB) {
17178 SDValue SubOp0 = Sub.getOperand(0);
17179 SDValue SubOp1 = Sub.getOperand(1);
17180 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
17181 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
17182 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
17183 SubOp1.getOperand(0),
17184 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
17185 }
17186 }
17187 }
17188
17189 // Looking for a truncate of i128 to i64.
17190 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17191 return SDValue();
17192
17193 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17194
17195 // SRL feeding TRUNCATE.
17196 if (Op0.getOpcode() == ISD::SRL) {
17197 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17198 // The right shift has to be by 64 bits.
17199 if (!ConstNode || ConstNode->getZExtValue() != 64)
17200 return SDValue();
17201
17202 // Switch the element number to extract.
17203 EltToExtract = EltToExtract ? 0 : 1;
17204 // Update Op0 past the SRL.
17205 Op0 = Op0.getOperand(0);
17206 }
17207
17208 // BITCAST feeding a TRUNCATE possibly via SRL.
17209 if (Op0.getOpcode() == ISD::BITCAST &&
17210 Op0.getValueType() == MVT::i128 &&
17211 Op0.getOperand(0).getValueType() == MVT::f128) {
17212 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17213 return DCI.DAG.getNode(
17214 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17215 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17216 }
17217 return SDValue();
17218}
17219
17220SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17221 SelectionDAG &DAG = DCI.DAG;
17222
17223 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17224 if (!ConstOpOrElement)
17225 return SDValue();
17226
17227 // An imul is usually smaller than the alternative sequence for legal type.
17228 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17229 isOperationLegal(ISD::MUL, N->getValueType(0)))
17230 return SDValue();
17231
17232 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17233 switch (this->Subtarget.getCPUDirective()) {
17234 default:
17235 // TODO: enhance the condition for subtarget before pwr8
17236 return false;
17237 case PPC::DIR_PWR8:
17238 // type mul add shl
17239 // scalar 4 1 1
17240 // vector 7 2 2
17241 return true;
17242 case PPC::DIR_PWR9:
17243 case PPC::DIR_PWR10:
17244 case PPC::DIR_PWR_FUTURE:
17245 // type mul add shl
17246 // scalar 5 2 2
17247 // vector 7 2 2
17248
17249 // The cycle RATIO of related operations are showed as a table above.
17250 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17251 // scalar and vector type. For 2 instrs patterns, add/sub + shl
17252 // are 4, it is always profitable; but for 3 instrs patterns
17253 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17254 // So we should only do it for vector type.
17255 return IsAddOne && IsNeg ? VT.isVector() : true;
17256 }
17257 };
17258
17259 EVT VT = N->getValueType(0);
17260 SDLoc DL(N);
17261
17262 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17263 bool IsNeg = MulAmt.isNegative();
17264 APInt MulAmtAbs = MulAmt.abs();
17265
17266 if ((MulAmtAbs - 1).isPowerOf2()) {
17267 // (mul x, 2^N + 1) => (add (shl x, N), x)
17268 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17269
17270 if (!IsProfitable(IsNeg, true, VT))
17271 return SDValue();
17272
17273 SDValue Op0 = N->getOperand(0);
17274 SDValue Op1 =
17275 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17276 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17277 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17278
17279 if (!IsNeg)
17280 return Res;
17281
17282 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17283 } else if ((MulAmtAbs + 1).isPowerOf2()) {
17284 // (mul x, 2^N - 1) => (sub (shl x, N), x)
17285 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17286
17287 if (!IsProfitable(IsNeg, false, VT))
17288 return SDValue();
17289
17290 SDValue Op0 = N->getOperand(0);
17291 SDValue Op1 =
17292 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17293 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17294
17295 if (!IsNeg)
17296 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17297 else
17298 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17299
17300 } else {
17301 return SDValue();
17302 }
17303}
17304
17305// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17306// in combiner since we need to check SD flags and other subtarget features.
17307SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17308 DAGCombinerInfo &DCI) const {
17309 SDValue N0 = N->getOperand(0);
17310 SDValue N1 = N->getOperand(1);
17311 SDValue N2 = N->getOperand(2);
17312 SDNodeFlags Flags = N->getFlags();
17313 EVT VT = N->getValueType(0);
17314 SelectionDAG &DAG = DCI.DAG;
17315 const TargetOptions &Options = getTargetMachine().Options;
17316 unsigned Opc = N->getOpcode();
17317 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17318 bool LegalOps = !DCI.isBeforeLegalizeOps();
17319 SDLoc Loc(N);
17320
17321 if (!isOperationLegal(ISD::FMA, VT))
17322 return SDValue();
17323
17324 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17325 // since (fnmsub a b c)=-0 while c-ab=+0.
17326 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17327 return SDValue();
17328
17329 // (fma (fneg a) b c) => (fnmsub a b c)
17330 // (fnmsub (fneg a) b c) => (fma a b c)
17331 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17332 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17333
17334 // (fma a (fneg b) c) => (fnmsub a b c)
17335 // (fnmsub a (fneg b) c) => (fma a b c)
17336 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17337 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17338
17339 return SDValue();
17340}
17341
17342bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17343 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17344 if (!Subtarget.is64BitELFABI())
17345 return false;
17346
17347 // If not a tail call then no need to proceed.
17348 if (!CI->isTailCall())
17349 return false;
17350
17351 // If sibling calls have been disabled and tail-calls aren't guaranteed
17352 // there is no reason to duplicate.
17353 auto &TM = getTargetMachine();
17354 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17355 return false;
17356
17357 // Can't tail call a function called indirectly, or if it has variadic args.
17358 const Function *Callee = CI->getCalledFunction();
17359 if (!Callee || Callee->isVarArg())
17360 return false;
17361
17362 // Make sure the callee and caller calling conventions are eligible for tco.
17363 const Function *Caller = CI->getParent()->getParent();
17364 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17365 CI->getCallingConv()))
17366 return false;
17367
17368 // If the function is local then we have a good chance at tail-calling it
17369 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17370}
17371
17372bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
17373 if (!Subtarget.hasVSX())
17374 return false;
17375 if (Subtarget.hasP9Vector() && VT == MVT::f128)
17376 return true;
17377 return VT == MVT::f32 || VT == MVT::f64 ||
17378 VT == MVT::v4f32 || VT == MVT::v2f64;
17379}
17380
17381bool PPCTargetLowering::
17382isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17383 const Value *Mask = AndI.getOperand(1);
17384 // If the mask is suitable for andi. or andis. we should sink the and.
17385 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17386 // Can't handle constants wider than 64-bits.
17387 if (CI->getBitWidth() > 64)
17388 return false;
17389 int64_t ConstVal = CI->getZExtValue();
17390 return isUInt<16>(ConstVal) ||
17391 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17392 }
17393
17394 // For non-constant masks, we can always use the record-form and.
17395 return true;
17396}
17397
17398// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17399// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17400// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17401// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17402// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17403SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17404 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(static_cast <bool> ((N->getOpcode() == ISD::ABS) &&
"Need ABS node here") ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17404, __extension__
__PRETTY_FUNCTION__))
;
17405 assert(Subtarget.hasP9Altivec() &&(static_cast <bool> (Subtarget.hasP9Altivec() &&
"Only combine this when P9 altivec supported!") ? void (0) :
__assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17406, __extension__
__PRETTY_FUNCTION__))
17406 "Only combine this when P9 altivec supported!")(static_cast <bool> (Subtarget.hasP9Altivec() &&
"Only combine this when P9 altivec supported!") ? void (0) :
__assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17406, __extension__
__PRETTY_FUNCTION__))
;
17407 EVT VT = N->getValueType(0);
17408 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17409 return SDValue();
17410
17411 SelectionDAG &DAG = DCI.DAG;
17412 SDLoc dl(N);
17413 if (N->getOperand(0).getOpcode() == ISD::SUB) {
17414 // Even for signed integers, if it's known to be positive (as signed
17415 // integer) due to zero-extended inputs.
17416 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17417 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17418 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17419 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
17420 (SubOpcd1 == ISD::ZERO_EXTEND ||
17421 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
17422 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17423 N->getOperand(0)->getOperand(0),
17424 N->getOperand(0)->getOperand(1),
17425 DAG.getTargetConstant(0, dl, MVT::i32));
17426 }
17427
17428 // For type v4i32, it can be optimized with xvnegsp + vabsduw
17429 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17430 N->getOperand(0).hasOneUse()) {
17431 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17432 N->getOperand(0)->getOperand(0),
17433 N->getOperand(0)->getOperand(1),
17434 DAG.getTargetConstant(1, dl, MVT::i32));
17435 }
17436 }
17437
17438 return SDValue();
17439}
17440
17441// For type v4i32/v8ii16/v16i8, transform
17442// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17443// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17444// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17445// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17446SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17447 DAGCombinerInfo &DCI) const {
17448 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(static_cast <bool> ((N->getOpcode() == ISD::VSELECT
) && "Need VSELECT node here") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17448, __extension__
__PRETTY_FUNCTION__))
;
17449 assert(Subtarget.hasP9Altivec() &&(static_cast <bool> (Subtarget.hasP9Altivec() &&
"Only combine this when P9 altivec supported!") ? void (0) :
__assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17450, __extension__
__PRETTY_FUNCTION__))
17450 "Only combine this when P9 altivec supported!")(static_cast <bool> (Subtarget.hasP9Altivec() &&
"Only combine this when P9 altivec supported!") ? void (0) :
__assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17450, __extension__
__PRETTY_FUNCTION__))
;
17451
17452 SelectionDAG &DAG = DCI.DAG;
17453 SDLoc dl(N);
17454 SDValue Cond = N->getOperand(0);
17455 SDValue TrueOpnd = N->getOperand(1);
17456 SDValue FalseOpnd = N->getOperand(2);
17457 EVT VT = N->getOperand(1).getValueType();
17458
17459 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17460 FalseOpnd.getOpcode() != ISD::SUB)
17461 return SDValue();
17462
17463 // ABSD only available for type v4i32/v8i16/v16i8
17464 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17465 return SDValue();
17466
17467 // At least to save one more dependent computation
17468 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17469 return SDValue();
17470
17471 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17472
17473 // Can only handle unsigned comparison here
17474 switch (CC) {
17475 default:
17476 return SDValue();
17477 case ISD::SETUGT:
17478 case ISD::SETUGE:
17479 break;
17480 case ISD::SETULT:
17481 case ISD::SETULE:
17482 std::swap(TrueOpnd, FalseOpnd);
17483 break;
17484 }
17485
17486 SDValue CmpOpnd1 = Cond.getOperand(0);
17487 SDValue CmpOpnd2 = Cond.getOperand(1);
17488
17489 // SETCC CmpOpnd1 CmpOpnd2 cond
17490 // TrueOpnd = CmpOpnd1 - CmpOpnd2
17491 // FalseOpnd = CmpOpnd2 - CmpOpnd1
17492 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17493 TrueOpnd.getOperand(1) == CmpOpnd2 &&
17494 FalseOpnd.getOperand(0) == CmpOpnd2 &&
17495 FalseOpnd.getOperand(1) == CmpOpnd1) {
17496 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17497 CmpOpnd1, CmpOpnd2,
17498 DAG.getTargetConstant(0, dl, MVT::i32));
17499 }
17500
17501 return SDValue();
17502}
17503
17504/// getAddrModeForFlags - Based on the set of address flags, select the most
17505/// optimal instruction format to match by.
17506PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17507 // This is not a node we should be handling here.
17508 if (Flags == PPC::MOF_None)
17509 return PPC::AM_None;
17510 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17511 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17512 if ((Flags & FlagSet) == FlagSet)
17513 return PPC::AM_DForm;
17514 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17515 if ((Flags & FlagSet) == FlagSet)
17516 return PPC::AM_DSForm;
17517 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17518 if ((Flags & FlagSet) == FlagSet)
17519 return PPC::AM_DQForm;
17520 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17521 if ((Flags & FlagSet) == FlagSet)
17522 return PPC::AM_PrefixDForm;
17523 // If no other forms are selected, return an X-Form as it is the most
17524 // general addressing mode.
17525 return PPC::AM_XForm;
17526}
17527
17528/// Set alignment flags based on whether or not the Frame Index is aligned.
17529/// Utilized when computing flags for address computation when selecting
17530/// load and store instructions.
17531static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17532 SelectionDAG &DAG) {
17533 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17534 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17535 if (!FI)
17536 return;
17537 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17538 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17539 // If this is (add $FI, $S16Imm), the alignment flags are already set
17540 // based on the immediate. We just need to clear the alignment flags
17541 // if the FI alignment is weaker.
17542 if ((FrameIndexAlign % 4) != 0)
17543 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17544 if ((FrameIndexAlign % 16) != 0)
17545 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17546 // If the address is a plain FrameIndex, set alignment flags based on
17547 // FI alignment.
17548 if (!IsAdd) {
17549 if ((FrameIndexAlign % 4) == 0)
17550 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17551 if ((FrameIndexAlign % 16) == 0)
17552 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17553 }
17554}
17555
17556/// Given a node, compute flags that are used for address computation when
17557/// selecting load and store instructions. The flags computed are stored in
17558/// FlagSet. This function takes into account whether the node is a constant,
17559/// an ADD, OR, or a constant, and computes the address flags accordingly.
17560static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17561 SelectionDAG &DAG) {
17562 // Set the alignment flags for the node depending on if the node is
17563 // 4-byte or 16-byte aligned.
17564 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17565 if ((Imm & 0x3) == 0)
17566 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17567 if ((Imm & 0xf) == 0)
17568 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17569 };
17570
17571 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
17572 // All 32-bit constants can be computed as LIS + Disp.
17573 const APInt &ConstImm = CN->getAPIntValue();
17574 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17575 FlagSet |= PPC::MOF_AddrIsSImm32;
17576 SetAlignFlagsForImm(ConstImm.getZExtValue());
17577 setAlignFlagsForFI(N, FlagSet, DAG);
17578 }
17579 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17580 FlagSet |= PPC::MOF_RPlusSImm34;
17581 else // Let constant materialization handle large constants.
17582 FlagSet |= PPC::MOF_NotAddNorCst;
17583 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17584 // This address can be represented as an addition of:
17585 // - Register + Imm16 (possibly a multiple of 4/16)
17586 // - Register + Imm34
17587 // - Register + PPCISD::Lo
17588 // - Register + Register
17589 // In any case, we won't have to match this as Base + Zero.
17590 SDValue RHS = N.getOperand(1);
17591 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
17592 const APInt &ConstImm = CN->getAPIntValue();
17593 if (ConstImm.isSignedIntN(16)) {
17594 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
17595 SetAlignFlagsForImm(ConstImm.getZExtValue());
17596 setAlignFlagsForFI(N, FlagSet, DAG);
17597 }
17598 if (ConstImm.isSignedIntN(34))
17599 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
17600 else
17601 FlagSet |= PPC::MOF_RPlusR; // Register.
17602 } else if (RHS.getOpcode() == PPCISD::Lo &&
17603 !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
17604 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
17605 else
17606 FlagSet |= PPC::MOF_RPlusR;
17607 } else { // The address computation is not a constant or an addition.
17608 setAlignFlagsForFI(N, FlagSet, DAG);
17609 FlagSet |= PPC::MOF_NotAddNorCst;
17610 }
17611}
17612
17613static bool isPCRelNode(SDValue N) {
17614 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
17615 isValidPCRelNode<ConstantPoolSDNode>(N) ||
17616 isValidPCRelNode<GlobalAddressSDNode>(N) ||
17617 isValidPCRelNode<JumpTableSDNode>(N) ||
17618 isValidPCRelNode<BlockAddressSDNode>(N));
17619}
17620
17621/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
17622/// the address flags of the load/store instruction that is to be matched.
17623unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
17624 SelectionDAG &DAG) const {
17625 unsigned FlagSet = PPC::MOF_None;
17626
17627 // Compute subtarget flags.
17628 if (!Subtarget.hasP9Vector())
17629 FlagSet |= PPC::MOF_SubtargetBeforeP9;
17630 else {
17631 FlagSet |= PPC::MOF_SubtargetP9;
17632 if (Subtarget.hasPrefixInstrs())
17633 FlagSet |= PPC::MOF_SubtargetP10;
17634 }
17635 if (Subtarget.hasSPE())
17636 FlagSet |= PPC::MOF_SubtargetSPE;
17637
17638 // Check if we have a PCRel node and return early.
17639 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
17640 return FlagSet;
17641
17642 // If the node is the paired load/store intrinsics, compute flags for
17643 // address computation and return early.
17644 unsigned ParentOp = Parent->getOpcode();
17645 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
17646 (ParentOp == ISD::INTRINSIC_VOID))) {
17647 unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
17648 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
17649 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
17650 ? Parent->getOperand(2)
17651 : Parent->getOperand(3);
17652 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
17653 FlagSet |= PPC::MOF_Vector;
17654 return FlagSet;
17655 }
17656 }
17657
17658 // Mark this as something we don't want to handle here if it is atomic
17659 // or pre-increment instruction.
17660 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17661 if (LSB->isIndexed())
17662 return PPC::MOF_None;
17663
17664 // Compute in-memory type flags. This is based on if there are scalars,
17665 // floats or vectors.
17666 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17667 assert(MN && "Parent should be a MemSDNode!")(static_cast <bool> (MN && "Parent should be a MemSDNode!"
) ? void (0) : __assert_fail ("MN && \"Parent should be a MemSDNode!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17667, __extension__
__PRETTY_FUNCTION__))
;
17668 EVT MemVT = MN->getMemoryVT();
17669 unsigned Size = MemVT.getSizeInBits();
17670 if (MemVT.isScalarInteger()) {
17671 assert(Size <= 128 &&(static_cast <bool> (Size <= 128 && "Not expecting scalar integers larger than 16 bytes!"
) ? void (0) : __assert_fail ("Size <= 128 && \"Not expecting scalar integers larger than 16 bytes!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17672, __extension__
__PRETTY_FUNCTION__))
17672 "Not expecting scalar integers larger than 16 bytes!")(static_cast <bool> (Size <= 128 && "Not expecting scalar integers larger than 16 bytes!"
) ? void (0) : __assert_fail ("Size <= 128 && \"Not expecting scalar integers larger than 16 bytes!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17672, __extension__
__PRETTY_FUNCTION__))
;
17673 if (Size < 32)
17674 FlagSet |= PPC::MOF_SubWordInt;
17675 else if (Size == 32)
17676 FlagSet |= PPC::MOF_WordInt;
17677 else
17678 FlagSet |= PPC::MOF_DoubleWordInt;
17679 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
17680 if (Size == 128)
17681 FlagSet |= PPC::MOF_Vector;
17682 else if (Size == 256) {
17683 assert(Subtarget.pairedVectorMemops() &&(static_cast <bool> (Subtarget.pairedVectorMemops() &&
"256-bit vectors are only available when paired vector memops is "
"enabled!") ? void (0) : __assert_fail ("Subtarget.pairedVectorMemops() && \"256-bit vectors are only available when paired vector memops is \" \"enabled!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17685, __extension__
__PRETTY_FUNCTION__))
17684 "256-bit vectors are only available when paired vector memops is "(static_cast <bool> (Subtarget.pairedVectorMemops() &&
"256-bit vectors are only available when paired vector memops is "
"enabled!") ? void (0) : __assert_fail ("Subtarget.pairedVectorMemops() && \"256-bit vectors are only available when paired vector memops is \" \"enabled!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17685, __extension__
__PRETTY_FUNCTION__))
17685 "enabled!")(static_cast <bool> (Subtarget.pairedVectorMemops() &&
"256-bit vectors are only available when paired vector memops is "
"enabled!") ? void (0) : __assert_fail ("Subtarget.pairedVectorMemops() && \"256-bit vectors are only available when paired vector memops is \" \"enabled!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17685, __extension__
__PRETTY_FUNCTION__))
;
17686 FlagSet |= PPC::MOF_Vector;
17687 } else
17688 llvm_unreachable("Not expecting illegal vectors!")::llvm::llvm_unreachable_internal("Not expecting illegal vectors!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17688)
;
17689 } else { // Floating point type: can be scalar, f128 or vector types.
17690 if (Size == 32 || Size == 64)
17691 FlagSet |= PPC::MOF_ScalarFloat;
17692 else if (MemVT == MVT::f128 || MemVT.isVector())
17693 FlagSet |= PPC::MOF_Vector;
17694 else
17695 llvm_unreachable("Not expecting illegal scalar floats!")::llvm::llvm_unreachable_internal("Not expecting illegal scalar floats!"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17695)
;
17696 }
17697
17698 // Compute flags for address computation.
17699 computeFlagsForAddressComputation(N, FlagSet, DAG);
17700
17701 // Compute type extension flags.
17702 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17703 switch (LN->getExtensionType()) {
17704 case ISD::SEXTLOAD:
17705 FlagSet |= PPC::MOF_SExt;
17706 break;
17707 case ISD::EXTLOAD:
17708 case ISD::ZEXTLOAD:
17709 FlagSet |= PPC::MOF_ZExt;
17710 break;
17711 case ISD::NON_EXTLOAD:
17712 FlagSet |= PPC::MOF_NoExt;
17713 break;
17714 }
17715 } else
17716 FlagSet |= PPC::MOF_NoExt;
17717
17718 // For integers, no extension is the same as zero extension.
17719 // We set the extension mode to zero extension so we don't have
17720 // to add separate entries in AddrModesMap for loads and stores.
17721 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
17722 FlagSet |= PPC::MOF_ZExt;
17723 FlagSet &= ~PPC::MOF_NoExt;
17724 }
17725
17726 // If we don't have prefixed instructions, 34-bit constants should be
17727 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
17728 bool IsNonP1034BitConst =
17729 ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
17730 FlagSet) == PPC::MOF_RPlusSImm34;
17731 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
17732 IsNonP1034BitConst)
17733 FlagSet |= PPC::MOF_NotAddNorCst;
17734
17735 return FlagSet;
17736}
17737
17738/// SelectForceXFormMode - Given the specified address, force it to be
17739/// represented as an indexed [r+r] operation (an XForm instruction).
17740PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
17741 SDValue &Base,
17742 SelectionDAG &DAG) const {
17743
17744 PPC::AddrMode Mode = PPC::AM_XForm;
17745 int16_t ForceXFormImm = 0;
17746 if (provablyDisjointOr(DAG, N) &&
17747 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
17748 Disp = N.getOperand(0);
17749 Base = N.getOperand(1);
17750 return Mode;
17751 }
17752
17753 // If the address is the result of an add, we will utilize the fact that the
17754 // address calculation includes an implicit add. However, we can reduce
17755 // register pressure if we do not materialize a constant just for use as the
17756 // index register. We only get rid of the add if it is not an add of a
17757 // value and a 16-bit signed constant and both have a single use.
17758 if (N.getOpcode() == ISD::ADD &&
17759 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
17760 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
17761 Disp = N.getOperand(0);
17762 Base = N.getOperand(1);
17763 return Mode;
17764 }
17765
17766 // Otherwise, use R0 as the base register.
17767 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17768 N.getValueType());
17769 Base = N;
17770
17771 return Mode;
17772}
17773
17774bool PPCTargetLowering::splitValueIntoRegisterParts(
17775 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17776 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
17777 EVT ValVT = Val.getValueType();
17778 // If we are splitting a scalar integer into f64 parts (i.e. so they
17779 // can be placed into VFRC registers), we need to zero extend and
17780 // bitcast the values. This will ensure the value is placed into a
17781 // VSR using direct moves or stack operations as needed.
17782 if (PartVT == MVT::f64 &&
17783 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
17784 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
17785 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
17786 Parts[0] = Val;
17787 return true;
17788 }
17789 return false;
17790}
17791
17792SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
17793 SelectionDAG &DAG) const {
17794 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17795 TargetLowering::CallLoweringInfo CLI(DAG);
17796 EVT RetVT = Op.getValueType();
17797 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
17798 SDValue Callee =
17799 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
17800 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
17801 TargetLowering::ArgListTy Args;
17802 TargetLowering::ArgListEntry Entry;
17803 for (const SDValue &N : Op->op_values()) {
17804 EVT ArgVT = N.getValueType();
17805 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
17806 Entry.Node = N;
17807 Entry.Ty = ArgTy;
17808 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
17809 Entry.IsZExt = !Entry.IsSExt;
17810 Args.push_back(Entry);
17811 }
17812
17813 SDValue InChain = DAG.getEntryNode();
17814 SDValue TCChain = InChain;
17815 const Function &F = DAG.getMachineFunction().getFunction();
17816 bool isTailCall =
17817 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
17818 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
17819 if (isTailCall)
17820 InChain = TCChain;
17821 CLI.setDebugLoc(SDLoc(Op))
17822 .setChain(InChain)
17823 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
17824 .setTailCall(isTailCall)
17825 .setSExtResult(SignExtend)
17826 .setZExtResult(!SignExtend)
17827 .setIsPostTypeLegalization(true);
17828 return TLI.LowerCallTo(CLI).first;
17829}
17830
17831SDValue PPCTargetLowering::lowerLibCallBasedOnType(
17832 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
17833 SelectionDAG &DAG) const {
17834 if (Op.getValueType() == MVT::f32)
17835 return lowerToLibCall(LibCallFloatName, Op, DAG);
17836
17837 if (Op.getValueType() == MVT::f64)
17838 return lowerToLibCall(LibCallDoubleName, Op, DAG);
17839
17840 return SDValue();
17841}
17842
17843bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
17844 SDNodeFlags Flags = Op.getNode()->getFlags();
17845 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
17846 Flags.hasNoNaNs() && Flags.hasNoInfs();
17847}
17848
17849bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
17850 return Op.getNode()->getFlags().hasApproximateFuncs();
17851}
17852
17853SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
17854 const char *LibCallFloatName,
17855 const char *LibCallDoubleNameFinite,
17856 const char *LibCallFloatNameFinite,
17857 SDValue Op,
17858 SelectionDAG &DAG) const {
17859 if (!isLowringToMASSSafe(Op))
17860 return SDValue();
17861
17862 if (!isLowringToMASSFiniteSafe(Op))
17863 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
17864 DAG);
17865
17866 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
17867 LibCallDoubleNameFinite, Op, DAG);
17868}
17869
17870SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
17871 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
17872 "__xl_powf_finite", Op, DAG);
17873}
17874
17875SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
17876 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
17877 "__xl_sinf_finite", Op, DAG);
17878}
17879
17880SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
17881 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
17882 "__xl_cosf_finite", Op, DAG);
17883}
17884
17885SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
17886 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
17887 "__xl_logf_finite", Op, DAG);
17888}
17889
17890SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
17891 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
17892 "__xl_log10f_finite", Op, DAG);
17893}
17894
17895SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
17896 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
17897 "__xl_expf_finite", Op, DAG);
17898}
17899
17900// If we happen to match to an aligned D-Form, check if the Frame Index is
17901// adequately aligned. If it is not, reset the mode to match to X-Form.
17902static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
17903 PPC::AddrMode &Mode) {
17904 if (!isa<FrameIndexSDNode>(N))
17905 return;
17906 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
17907 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
17908 Mode = PPC::AM_XForm;
17909}
17910
17911/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
17912/// compute the address flags of the node, get the optimal address mode based
17913/// on the flags, and set the Base and Disp based on the address mode.
17914PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
17915 SDValue N, SDValue &Disp,
17916 SDValue &Base,
17917 SelectionDAG &DAG,
17918 MaybeAlign Align) const {
17919 SDLoc DL(Parent);
17920
17921 // Compute the address flags.
17922 unsigned Flags = computeMOFlags(Parent, N, DAG);
17923
17924 // Get the optimal address mode based on the Flags.
17925 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
17926
17927 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
17928 // Select an X-Form load if it is not.
17929 setXFormForUnalignedFI(N, Flags, Mode);
17930
17931 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
17932 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
17933 assert(Subtarget.isUsingPCRelativeCalls() &&(static_cast <bool> (Subtarget.isUsingPCRelativeCalls()
&& "Must be using PC-Relative calls when a valid PC-Relative node is "
"present!") ? void (0) : __assert_fail ("Subtarget.isUsingPCRelativeCalls() && \"Must be using PC-Relative calls when a valid PC-Relative node is \" \"present!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17935, __extension__
__PRETTY_FUNCTION__))
17934 "Must be using PC-Relative calls when a valid PC-Relative node is "(static_cast <bool> (Subtarget.isUsingPCRelativeCalls()
&& "Must be using PC-Relative calls when a valid PC-Relative node is "
"present!") ? void (0) : __assert_fail ("Subtarget.isUsingPCRelativeCalls() && \"Must be using PC-Relative calls when a valid PC-Relative node is \" \"present!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17935, __extension__
__PRETTY_FUNCTION__))
17935 "present!")(static_cast <bool> (Subtarget.isUsingPCRelativeCalls()
&& "Must be using PC-Relative calls when a valid PC-Relative node is "
"present!") ? void (0) : __assert_fail ("Subtarget.isUsingPCRelativeCalls() && \"Must be using PC-Relative calls when a valid PC-Relative node is \" \"present!\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17935, __extension__
__PRETTY_FUNCTION__))
;
17936 Mode = PPC::AM_PCRel;
17937 }
17938
17939 // Set Base and Disp accordingly depending on the address mode.
17940 switch (Mode) {
17941 case PPC::AM_DForm:
17942 case PPC::AM_DSForm:
17943 case PPC::AM_DQForm: {
17944 // This is a register plus a 16-bit immediate. The base will be the
17945 // register and the displacement will be the immediate unless it
17946 // isn't sufficiently aligned.
17947 if (Flags & PPC::MOF_RPlusSImm16) {
17948 SDValue Op0 = N.getOperand(0);
17949 SDValue Op1 = N.getOperand(1);
17950 int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
17951 if (!Align || isAligned(*Align, Imm)) {
17952 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
17953 Base = Op0;
17954 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
17955 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17956 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17957 }
17958 break;
17959 }
17960 }
17961 // This is a register plus the @lo relocation. The base is the register
17962 // and the displacement is the global address.
17963 else if (Flags & PPC::MOF_RPlusLo) {
17964 Disp = N.getOperand(1).getOperand(0); // The global address.
17965 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17968, __extension__
__PRETTY_FUNCTION__))
17966 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17968, __extension__
__PRETTY_FUNCTION__))
17967 Disp.getOpcode() == ISD::TargetConstantPool ||(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17968, __extension__
__PRETTY_FUNCTION__))
17968 Disp.getOpcode() == ISD::TargetJumpTable)(static_cast <bool> (Disp.getOpcode() == ISD::TargetGlobalAddress
|| Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode
() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? void (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 17968, __extension__
__PRETTY_FUNCTION__))
;
17969 Base = N.getOperand(0);
17970 break;
17971 }
17972 // This is a constant address at most 32 bits. The base will be
17973 // zero or load-immediate-shifted and the displacement will be
17974 // the low 16 bits of the address.
17975 else if (Flags & PPC::MOF_AddrIsSImm32) {
17976 auto *CN = cast<ConstantSDNode>(N);
17977 EVT CNType = CN->getValueType(0);
17978 uint64_t CNImm = CN->getZExtValue();
17979 // If this address fits entirely in a 16-bit sext immediate field, codegen
17980 // this as "d, 0".
17981 int16_t Imm;
17982 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
17983 Disp = DAG.getTargetConstant(Imm, DL, CNType);
17984 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17985 CNType);
17986 break;
17987 }
17988 // Handle 32-bit sext immediate with LIS + Addr mode.
17989 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
17990 (!Align || isAligned(*Align, CNImm))) {
17991 int32_t Addr = (int32_t)CNImm;
17992 // Otherwise, break this down into LIS + Disp.
17993 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
17994 Base =
17995 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
17996 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
17997 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
17998 break;
17999 }
18000 }
18001 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18002 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18003 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18004 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18005 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18006 } else
18007 Base = N;
18008 break;
18009 }
18010 case PPC::AM_PrefixDForm: {
18011 int64_t Imm34 = 0;
18012 unsigned Opcode = N.getOpcode();
18013 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18014 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18015 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18016 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18017 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18018 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18019 else
18020 Base = N.getOperand(0);
18021 } else if (isIntS34Immediate(N, Imm34)) {
18022 // The address is a 34-bit signed immediate.
18023 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18024 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18025 }
18026 break;
18027 }
18028 case PPC::AM_PCRel: {
18029 // When selecting PC-Relative instructions, "Base" is not utilized as
18030 // we select the address as [PC+imm].
18031 Disp = N;
18032 break;
18033 }
18034 case PPC::AM_None:
18035 break;
18036 default: { // By default, X-Form is always available to be selected.
18037 // When a frame index is not aligned, we also match by XForm.
18038 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18039 Base = FI ? N : N.getOperand(1);
18040 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18041 N.getValueType())
18042 : N.getOperand(0);
18043 break;
18044 }
18045 }
18046 return Mode;
18047}
18048
18049CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
18050 bool Return,
18051 bool IsVarArg) const {
18052 switch (CC) {
18053 case CallingConv::Cold:
18054 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
18055 default:
18056 return CC_PPC64_ELF_FIS;
18057 }
18058}
18059
18060bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18061 // TODO: 16-byte atomic type support for AIX is in progress; we should be able
18062 // to inline 16-byte atomic ops on AIX too in the future.
18063 return Subtarget.isPPC64() &&
18064 (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
18065 Subtarget.hasQuadwordAtomics();
18066}
18067
18068TargetLowering::AtomicExpansionKind
18069PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18070 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18071 if (shouldInlineQuadwordAtomics() && Size == 128)
18072 return AtomicExpansionKind::MaskedIntrinsic;
18073 return TargetLowering::shouldExpandAtomicRMWInIR(AI);
18074}
18075
18076TargetLowering::AtomicExpansionKind
18077PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
18078 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18079 if (shouldInlineQuadwordAtomics() && Size == 128)
18080 return AtomicExpansionKind::MaskedIntrinsic;
18081 return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
18082}
18083
18084static Intrinsic::ID
18085getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
18086 switch (BinOp) {
18087 default:
18088 llvm_unreachable("Unexpected AtomicRMW BinOp")::llvm::llvm_unreachable_internal("Unexpected AtomicRMW BinOp"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 18088)
;
18089 case AtomicRMWInst::Xchg:
18090 return Intrinsic::ppc_atomicrmw_xchg_i128;
18091 case AtomicRMWInst::Add:
18092 return Intrinsic::ppc_atomicrmw_add_i128;
18093 case AtomicRMWInst::Sub:
18094 return Intrinsic::ppc_atomicrmw_sub_i128;
18095 case AtomicRMWInst::And:
18096 return Intrinsic::ppc_atomicrmw_and_i128;
18097 case AtomicRMWInst::Or:
18098 return Intrinsic::ppc_atomicrmw_or_i128;
18099 case AtomicRMWInst::Xor:
18100 return Intrinsic::ppc_atomicrmw_xor_i128;
18101 case AtomicRMWInst::Nand:
18102 return Intrinsic::ppc_atomicrmw_nand_i128;
18103 }
18104}
18105
18106Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
18107 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18108 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18109 assert(shouldInlineQuadwordAtomics() && "Only support quadword now")(static_cast <bool> (shouldInlineQuadwordAtomics() &&
"Only support quadword now") ? void (0) : __assert_fail ("shouldInlineQuadwordAtomics() && \"Only support quadword now\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 18109, __extension__
__PRETTY_FUNCTION__))
;
18110 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18111 Type *ValTy = Incr->getType();
18112 assert(ValTy->getPrimitiveSizeInBits() == 128)(static_cast <bool> (ValTy->getPrimitiveSizeInBits()
== 128) ? void (0) : __assert_fail ("ValTy->getPrimitiveSizeInBits() == 128"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 18112, __extension__
__PRETTY_FUNCTION__))
;
18113 Function *RMW = Intrinsic::getDeclaration(
18114 M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
18115 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18116 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18117 Value *IncrHi =
18118 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18119 Value *Addr =
18120 Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18121 Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
18122 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18123 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18124 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18125 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18126 return Builder.CreateOr(
18127 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18128}
18129
18130Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18131 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18132 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18133 assert(shouldInlineQuadwordAtomics() && "Only support quadword now")(static_cast <bool> (shouldInlineQuadwordAtomics() &&
"Only support quadword now") ? void (0) : __assert_fail ("shouldInlineQuadwordAtomics() && \"Only support quadword now\""
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 18133, __extension__
__PRETTY_FUNCTION__))
;
18134 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18135 Type *ValTy = CmpVal->getType();
18136 assert(ValTy->getPrimitiveSizeInBits() == 128)(static_cast <bool> (ValTy->getPrimitiveSizeInBits()
== 128) ? void (0) : __assert_fail ("ValTy->getPrimitiveSizeInBits() == 128"
, "llvm/lib/Target/PowerPC/PPCISelLowering.cpp", 18136, __extension__
__PRETTY_FUNCTION__))
;
18137 Function *IntCmpXchg =
18138 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18139 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18140 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18141 Value *CmpHi =
18142 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18143 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18144 Value *NewHi =
18145 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18146 Value *Addr =
18147 Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18148 emitLeadingFence(Builder, CI, Ord);
18149 Value *LoHi =
18150 Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
18151 emitTrailingFence(Builder, CI, Ord);
18152 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18153 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18154 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18155 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18156 return Builder.CreateOr(
18157 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18158}

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB != ZB_Undefined && Val == 0)
117 return 32;
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB
7.1
'ZB' is not equal to ZB_Undefined
7.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val == 0)
8
Assuming 'Val' is equal to 0
9
Taking true branch
133 return 64;
10
Returning the value 64
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
7
Calling 'TrailingZerosCounter::count'
11
Returning from 'TrailingZerosCounter::count'
12
Returning the value 64
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 251, __extension__
__PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318 return __builtin_bitreverse8(Val);
319}
320#endif
321
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325 return __builtin_bitreverse16(Val);
326}
327#endif
328
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332 return __builtin_bitreverse32(Val);
333}
334#endif
335
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339 return __builtin_bitreverse64(Val);
340}
341#endif
342
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349 return static_cast<uint32_t>(Value >> 32);
350}
351
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354 return static_cast<uint32_t>(Value);
355}
356
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359 return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368 return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371 return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374 return static_cast<int32_t>(x) == x;
375}
376
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380 static_assert(
381 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390/// return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396 static_assert(N > 0, "isUInt<0> doesn't make sense");
397 return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401 return true;
402}
403
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406 return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409 return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412 return static_cast<uint32_t>(x) == x;
413}
414
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418 static_assert(
419 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420 static_assert(N + S <= 64,
421 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422 // Per the two static_asserts above, S must be strictly less than 64. So
423 // 1 << S is not undefined behavior.
424 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 429, __extension__
__PRETTY_FUNCTION__))
;
430
431 // uint64_t(1) << 64 is undefined behavior, so we can't do
432 // (uint64_t(1) << N) - 1
433 // without checking first that N != 64. But this works and doesn't have a
434 // branch.
435 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 440, __extension__
__PRETTY_FUNCTION__))
;
441
442 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 447, __extension__
__PRETTY_FUNCTION__))
;
448
449 // This relies on two's complement wraparound when N == 64, so we convert to
450 // int64_t only at the very end to avoid UB.
451 return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456 return N >= 64 || x <= maxUIntN(N);
457}
458
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468 return Value && ((Value + 1) & Value) == 0;
469}
470
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474 return Value && ((Value + 1) & Value) == 0;
475}
476
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480 return Value && isMask_32((Value - 1) | Value);
481}
482
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486 return Value && isMask_64((Value - 1) | Value);
487}
488
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492 return Value && !(Value & (Value - 1));
493}
494
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497 return Value && !(Value & (Value - 1));
498}
499
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510 static_assert(std::numeric_limits<T>::is_integer &&
511 !std::numeric_limits<T>::is_signed,
512 "Only unsigned integral types are allowed.");
513 return countLeadingZeros<T>(~Value, ZB);
514}
515
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526 static_assert(std::numeric_limits<T>::is_integer &&
527 !std::numeric_limits<T>::is_signed,
528 "Only unsigned integral types are allowed.");
529 return countTrailingZeros<T>(~Value, ZB);
530}
531
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534 static unsigned count(T Value) {
535 // Generic version, forward to 32 bits.
536 static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538 return __builtin_popcount(Value);
539#else
540 uint32_t v = Value;
541 v = v - ((v >> 1) & 0x55555555);
542 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545 }
546};
547
548template <typename T> struct PopulationCounter<T, 8> {
549 static unsigned count(T Value) {
550#if defined(__GNUC__4)
551 return __builtin_popcountll(Value);
552#else
553 uint64_t v = Value;
554 v = v - ((v >> 1) & 0x5555555555555555ULL);
555 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559 }
560};
561} // namespace detail
562
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568 static_assert(std::numeric_limits<T>::is_integer &&
569 !std::numeric_limits<T>::is_signed,
570 "Only unsigned integral types are allowed.");
571 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573
574/// Return true if the argument contains a non-empty sequence of ones with the
575/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
576/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
577/// MaskLen is updated to specify the length of the mask, else neither are
578/// updated.
579inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
580 unsigned &MaskLen) {
581 if (!isShiftedMask_32(Value))
582 return false;
583 MaskIdx = countTrailingZeros(Value);
584 MaskLen = countPopulation(Value);
585 return true;
586}
587
588/// Return true if the argument contains a non-empty sequence of ones with the
589/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
590/// of the lowest set bit and \p MaskLen is updated to specify the length of the
591/// mask, else neither are updated.
592inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
593 unsigned &MaskLen) {
594 if (!isShiftedMask_64(Value))
595 return false;
596 MaskIdx = countTrailingZeros(Value);
597 MaskLen = countPopulation(Value);
598 return true;
599}
600
601/// Compile time Log2.
602/// Valid only for positive powers of two.
603template <size_t kValue> constexpr inline size_t CTLog2() {
604 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
605 "Value is not a valid power of 2");
606 return 1 + CTLog2<kValue / 2>();
607}
608
609template <> constexpr inline size_t CTLog2<1>() { return 0; }
610
611/// Return the log base 2 of the specified value.
612inline double Log2(double Value) {
613#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
614 return __builtin_log(Value) / __builtin_log(2.0);
615#else
616 return log2(Value);
617#endif
618}
619
620/// Return the floor log base 2 of the specified value, -1 if the value is zero.
621/// (32 bit edition.)
622/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
623inline unsigned Log2_32(uint32_t Value) {
624 return 31 - countLeadingZeros(Value);
625}
626
627/// Return the floor log base 2 of the specified value, -1 if the value is zero.
628/// (64 bit edition.)
629inline unsigned Log2_64(uint64_t Value) {
630 return 63 - countLeadingZeros(Value);
631}
632
633/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
634/// (32 bit edition).
635/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
636inline unsigned Log2_32_Ceil(uint32_t Value) {
637 return 32 - countLeadingZeros(Value - 1);
638}
639
640/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
641/// (64 bit edition.)
642inline unsigned Log2_64_Ceil(uint64_t Value) {
643 return 64 - countLeadingZeros(Value - 1);
644}
645
646/// Return the greatest common divisor of the values using Euclid's algorithm.
647template <typename T>
648inline T greatestCommonDivisor(T A, T B) {
649 while (B) {
650 T Tmp = B;
651 B = A % B;
652 A = Tmp;
653 }
654 return A;
655}
656
657inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
658 return greatestCommonDivisor<uint64_t>(A, B);
659}
660
661/// This function takes a 64-bit integer and returns the bit equivalent double.
662inline double BitsToDouble(uint64_t Bits) {
663 double D;
664 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
665 memcpy(&D, &Bits, sizeof(Bits));
666 return D;
667}
668
669/// This function takes a 32-bit integer and returns the bit equivalent float.
670inline float BitsToFloat(uint32_t Bits) {
671 float F;
672 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
673 memcpy(&F, &Bits, sizeof(Bits));
674 return F;
675}
676
677/// This function takes a double and returns the bit equivalent 64-bit integer.
678/// Note that copying doubles around changes the bits of NaNs on some hosts,
679/// notably x86, so this routine cannot be used if these bits are needed.
680inline uint64_t DoubleToBits(double Double) {
681 uint64_t Bits;
682 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
683 memcpy(&Bits, &Double, sizeof(Double));
684 return Bits;
685}
686
687/// This function takes a float and returns the bit equivalent 32-bit integer.
688/// Note that copying floats around changes the bits of NaNs on some hosts,
689/// notably x86, so this routine cannot be used if these bits are needed.
690inline uint32_t FloatToBits(float Float) {
691 uint32_t Bits;
692 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
693 memcpy(&Bits, &Float, sizeof(Float));
694 return Bits;
695}
696
697/// A and B are either alignments or offsets. Return the minimum alignment that
698/// may be assumed after adding the two together.
699constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
700 // The largest power of 2 that divides both A and B.
701 //
702 // Replace "-Value" by "1+~Value" in the following commented code to avoid
703 // MSVC warning C4146
704 // return (A | B) & -(A | B);
705 return (A | B) & (1 + ~(A | B));
706}
707
708/// Returns the next power of two (in 64-bits) that is strictly greater than A.
709/// Returns zero on overflow.
710constexpr inline uint64_t NextPowerOf2(uint64_t A) {
711 A |= (A >> 1);
712 A |= (A >> 2);
713 A |= (A >> 4);
714 A |= (A >> 8);
715 A |= (A >> 16);
716 A |= (A >> 32);
717 return A + 1;
718}
719
720/// Returns the power of two which is less than or equal to the given value.
721/// Essentially, it is a floor operation across the domain of powers of two.
722inline uint64_t PowerOf2Floor(uint64_t A) {
723 if (!A) return 0;
724 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
725}
726
727/// Returns the power of two which is greater than or equal to the given value.
728/// Essentially, it is a ceil operation across the domain of powers of two.
729inline uint64_t PowerOf2Ceil(uint64_t A) {
730 if (!A)
731 return 0;
732 return NextPowerOf2(A - 1);
733}
734
735/// Returns the next integer (mod 2**64) that is greater than or equal to
736/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
737///
738/// If non-zero \p Skew is specified, the return value will be a minimal
739/// integer that is greater than or equal to \p Value and equal to
740/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
741/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
742///
743/// Examples:
744/// \code
745/// alignTo(5, 8) = 8
746/// alignTo(17, 8) = 24
747/// alignTo(~0LL, 8) = 0
748/// alignTo(321, 255) = 510
749///
750/// alignTo(5, 8, 7) = 7
751/// alignTo(17, 8, 1) = 17
752/// alignTo(~0LL, 8, 3) = 3
753/// alignTo(321, 255, 42) = 552
754/// \endcode
755inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
756 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 756, __extension__
__PRETTY_FUNCTION__))
;
757 Skew %= Align;
758 return (Value + Align - 1 - Skew) / Align * Align + Skew;
759}
760
761/// Returns the next integer (mod 2**64) that is greater than or equal to
762/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
763template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
764 static_assert(Align != 0u, "Align must be non-zero");
765 return (Value + Align - 1) / Align * Align;
766}
767
768/// Returns the integer ceil(Numerator / Denominator).
769inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
770 return alignTo(Numerator, Denominator) / Denominator;
771}
772
773/// Returns the integer nearest(Numerator / Denominator).
774inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
775 return (Numerator + (Denominator / 2)) / Denominator;
776}
777
778/// Returns the largest uint64_t less than or equal to \p Value and is
779/// \p Skew mod \p Align. \p Align must be non-zero
780inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
781 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 781, __extension__
__PRETTY_FUNCTION__))
;
782 Skew %= Align;
783 return (Value - Skew) / Align * Align + Skew;
784}
785
786/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
787/// Requires 0 < B <= 32.
788template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
789 static_assert(B > 0, "Bit width can't be 0.");
790 static_assert(B <= 32, "Bit width out of range.");
791 return int32_t(X << (32 - B)) >> (32 - B);
792}
793
794/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
795/// Requires 0 < B <= 32.
796inline int32_t SignExtend32(uint32_t X, unsigned B) {
797 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 797, __extension__
__PRETTY_FUNCTION__))
;
798 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 798, __extension__
__PRETTY_FUNCTION__))
;
799 return int32_t(X << (32 - B)) >> (32 - B);
800}
801
802/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
803/// Requires 0 < B <= 64.
804template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
805 static_assert(B > 0, "Bit width can't be 0.");
806 static_assert(B <= 64, "Bit width out of range.");
807 return int64_t(x << (64 - B)) >> (64 - B);
808}
809
810/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
811/// Requires 0 < B <= 64.
812inline int64_t SignExtend64(uint64_t X, unsigned B) {
813 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 813, __extension__
__PRETTY_FUNCTION__))
;
814 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 814, __extension__
__PRETTY_FUNCTION__))
;
815 return int64_t(X << (64 - B)) >> (64 - B);
816}
817
818/// Subtract two unsigned integers, X and Y, of type T and return the absolute
819/// value of the result.
820template <typename T>
821std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
822 return X > Y ? (X - Y) : (Y - X);
823}
824
825/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
826/// maximum representable value of T on overflow. ResultOverflowed indicates if
827/// the result is larger than the maximum representable value of type T.
828template <typename T>
829std::enable_if_t<std::is_unsigned<T>::value, T>
830SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
831 bool Dummy;
832 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
833 // Hacker's Delight, p. 29
834 T Z = X + Y;
835 Overflowed = (Z < X || Z < Y);
836 if (Overflowed)
837 return std::numeric_limits<T>::max();
838 else
839 return Z;
840}
841
842/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
843/// maximum representable value of T on overflow. ResultOverflowed indicates if
844/// the result is larger than the maximum representable value of type T.
845template <typename T>
846std::enable_if_t<std::is_unsigned<T>::value, T>
847SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
848 bool Dummy;
849 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
850
851 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
852 // because it fails for uint16_t (where multiplication can have undefined
853 // behavior due to promotion to int), and requires a division in addition
854 // to the multiplication.
855
856 Overflowed = false;
857
858 // Log2(Z) would be either Log2Z or Log2Z + 1.
859 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
860 // will necessarily be less than Log2Max as desired.
861 int Log2Z = Log2_64(X) + Log2_64(Y);
862 const T Max = std::numeric_limits<T>::max();
863 int Log2Max = Log2_64(Max);
864 if (Log2Z < Log2Max) {
865 return X * Y;
866 }
867 if (Log2Z > Log2Max) {
868 Overflowed = true;
869 return Max;
870 }
871
872 // We're going to use the top bit, and maybe overflow one
873 // bit past it. Multiply all but the bottom bit then add
874 // that on at the end.
875 T Z = (X >> 1) * Y;
876 if (Z & ~(Max >> 1)) {
877 Overflowed = true;
878 return Max;
879 }
880 Z <<= 1;
881 if (X & 1)
882 return SaturatingAdd(Z, Y, ResultOverflowed);
883
884 return Z;
885}
886
887/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
888/// the product. Clamp the result to the maximum representable value of T on
889/// overflow. ResultOverflowed indicates if the result is larger than the
890/// maximum representable value of type T.
891template <typename T>
892std::enable_if_t<std::is_unsigned<T>::value, T>
893SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
894 bool Dummy;
895 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
896
897 T Product = SaturatingMultiply(X, Y, &Overflowed);
898 if (Overflowed)
899 return Product;
900
901 return SaturatingAdd(A, Product, &Overflowed);
902}
903
904/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
905extern const float huge_valf;
906
907
908/// Add two signed integers, computing the two's complement truncated result,
909/// returning true if overflow occurred.
910template <typename T>
911std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
912#if __has_builtin(__builtin_add_overflow)1
913 return __builtin_add_overflow(X, Y, &Result);
914#else
915 // Perform the unsigned addition.
916 using U = std::make_unsigned_t<T>;
917 const U UX = static_cast<U>(X);
918 const U UY = static_cast<U>(Y);
919 const U UResult = UX + UY;
920
921 // Convert to signed.
922 Result = static_cast<T>(UResult);
923
924 // Adding two positive numbers should result in a positive number.
925 if (X > 0 && Y > 0)
926 return Result <= 0;
927 // Adding two negatives should result in a negative number.
928 if (X < 0 && Y < 0)
929 return Result >= 0;
930 return false;
931#endif
932}
933
934/// Subtract two signed integers, computing the two's complement truncated
935/// result, returning true if an overflow ocurred.
936template <typename T>
937std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
938#if __has_builtin(__builtin_sub_overflow)1
939 return __builtin_sub_overflow(X, Y, &Result);
940#else
941 // Perform the unsigned addition.
942 using U = std::make_unsigned_t<T>;
943 const U UX = static_cast<U>(X);
944 const U UY = static_cast<U>(Y);
945 const U UResult = UX - UY;
946
947 // Convert to signed.
948 Result = static_cast<T>(UResult);
949
950 // Subtracting a positive number from a negative results in a negative number.
951 if (X <= 0 && Y > 0)
952 return Result >= 0;
953 // Subtracting a negative number from a positive results in a positive number.
954 if (X >= 0 && Y < 0)
955 return Result <= 0;
956 return false;
957#endif
958}
959
960/// Multiply two signed integers, computing the two's complement truncated
961/// result, returning true if an overflow ocurred.
962template <typename T>
963std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
964 // Perform the unsigned multiplication on absolute values.
965 using U = std::make_unsigned_t<T>;
966 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
967 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
968 const U UResult = UX * UY;
969
970 // Convert to signed.
971 const bool IsNegative = (X < 0) ^ (Y < 0);
972 Result = IsNegative ? (0 - UResult) : UResult;
973
974 // If any of the args was 0, result is 0 and no overflow occurs.
975 if (UX == 0 || UY == 0)
976 return false;
977
978 // UX and UY are in [1, 2^n], where n is the number of digits.
979 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
980 // positive) divided by an argument compares to the other.
981 if (IsNegative)
982 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
983 else
984 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
985}
986
987} // End llvm namespace
988
989#endif