Bug Summary

File:llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 9265, column 36
Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-10-27-053609-25509-1 -x c++ /build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCRegisterInfo.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MachineValueType.h"
90#include "llvm/Support/MathExtras.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Target/TargetMachine.h"
93#include "llvm/Target/TargetOptions.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124static cl::opt<bool> EnablePPCPCRelTLS(
125 "enable-ppc-pcrel-tls",
126 cl::desc("enable the use of PC relative memops in TLS instructions on PPC"),
127 cl::Hidden);
128
129STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"}
;
130STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"}
;
131STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
}
;
132STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
}
;
133
134static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135
136static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137
138// FIXME: Remove this once the bug has been fixed!
139extern cl::opt<bool> ANDIGlueBug;
140
141PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142 const PPCSubtarget &STI)
143 : TargetLowering(TM), Subtarget(STI) {
144 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145 // arguments are at least 4/8 bytes aligned.
146 bool isPPC64 = Subtarget.isPPC64();
147 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148
149 // Set up the register classes.
150 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151 if (!useSoftFloat()) {
152 if (hasSPE()) {
153 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155 } else {
156 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158 }
159 }
160
161 // Match BITREVERSE to customized fast code sequence in the td file.
162 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164
165 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167
168 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169 for (MVT VT : MVT::integer_valuetypes()) {
170 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172 }
173
174 if (Subtarget.isISA3_0()) {
175 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179 } else {
180 // No extending loads from f16 or HW conversions back and forth.
181 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 }
190
191 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192
193 // PowerPC has pre-inc load and store's.
194 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204 if (!Subtarget.hasSPE()) {
205 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209 }
210
211 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213 for (MVT VT : ScalarIntVTs) {
214 setOperationAction(ISD::ADDC, VT, Legal);
215 setOperationAction(ISD::ADDE, VT, Legal);
216 setOperationAction(ISD::SUBC, VT, Legal);
217 setOperationAction(ISD::SUBE, VT, Legal);
218 }
219
220 if (Subtarget.useCRBits()) {
221 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222
223 if (isPPC64 || Subtarget.hasFPCVT()) {
224 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
225 AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
226 isPPC64 ? MVT::i64 : MVT::i32);
227 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
228 AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
229 isPPC64 ? MVT::i64 : MVT::i32);
230
231 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
232 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
233 isPPC64 ? MVT::i64 : MVT::i32);
234 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
235 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
236 isPPC64 ? MVT::i64 : MVT::i32);
237 } else {
238 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
239 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
242 }
243
244 // PowerPC does not support direct load/store of condition registers.
245 setOperationAction(ISD::LOAD, MVT::i1, Custom);
246 setOperationAction(ISD::STORE, MVT::i1, Custom);
247
248 // FIXME: Remove this once the ANDI glue bug is fixed:
249 if (ANDIGlueBug)
250 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
251
252 for (MVT VT : MVT::integer_valuetypes()) {
253 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
254 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
255 setTruncStoreAction(VT, MVT::i1, Expand);
256 }
257
258 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
259 }
260
261 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
262 // PPC (the libcall is not available).
263 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
264 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
265 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
267
268 // We do not currently implement these libm ops for PowerPC.
269 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
270 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
271 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
272 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
273 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
274 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
275
276 // PowerPC has no SREM/UREM instructions unless we are on P9
277 // On P9 we may use a hardware instruction to compute the remainder.
278 // When the result of both the remainder and the division is required it is
279 // more efficient to compute the remainder from the result of the division
280 // rather than use the remainder instruction. The instructions are legalized
281 // directly because the DivRemPairsPass performs the transformation at the IR
282 // level.
283 if (Subtarget.isISA3_0()) {
284 setOperationAction(ISD::SREM, MVT::i32, Legal);
285 setOperationAction(ISD::UREM, MVT::i32, Legal);
286 setOperationAction(ISD::SREM, MVT::i64, Legal);
287 setOperationAction(ISD::UREM, MVT::i64, Legal);
288 } else {
289 setOperationAction(ISD::SREM, MVT::i32, Expand);
290 setOperationAction(ISD::UREM, MVT::i32, Expand);
291 setOperationAction(ISD::SREM, MVT::i64, Expand);
292 setOperationAction(ISD::UREM, MVT::i64, Expand);
293 }
294
295 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
296 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
299 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
300 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
301 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
302 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
304
305 // Handle constrained floating-point operations of scalar.
306 // TODO: Handle SPE specific operation.
307 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
308 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
309 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
310 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
311 setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
312 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
313
314 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
315 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
316 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
317 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
318 setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
319 if (Subtarget.hasVSX()) {
320 setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
321 setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
322 }
323
324 if (Subtarget.hasFSQRT()) {
325 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
326 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
327 }
328
329 if (Subtarget.hasFPRND()) {
330 setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
331 setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
332 setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
333 setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
334
335 setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
336 setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
337 setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
338 setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
339 }
340
341 // We don't support sin/cos/sqrt/fmod/pow
342 setOperationAction(ISD::FSIN , MVT::f64, Expand);
343 setOperationAction(ISD::FCOS , MVT::f64, Expand);
344 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
345 setOperationAction(ISD::FREM , MVT::f64, Expand);
346 setOperationAction(ISD::FPOW , MVT::f64, Expand);
347 setOperationAction(ISD::FSIN , MVT::f32, Expand);
348 setOperationAction(ISD::FCOS , MVT::f32, Expand);
349 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
350 setOperationAction(ISD::FREM , MVT::f32, Expand);
351 setOperationAction(ISD::FPOW , MVT::f32, Expand);
352 if (Subtarget.hasSPE()) {
353 setOperationAction(ISD::FMA , MVT::f64, Expand);
354 setOperationAction(ISD::FMA , MVT::f32, Expand);
355 } else {
356 setOperationAction(ISD::FMA , MVT::f64, Legal);
357 setOperationAction(ISD::FMA , MVT::f32, Legal);
358 }
359
360 if (Subtarget.hasSPE())
361 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
362
363 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
364
365 // If we're enabling GP optimizations, use hardware square root
366 if (!Subtarget.hasFSQRT() &&
367 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
368 Subtarget.hasFRE()))
369 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
370
371 if (!Subtarget.hasFSQRT() &&
372 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
373 Subtarget.hasFRES()))
374 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
375
376 if (Subtarget.hasFCPSGN()) {
377 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
378 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
379 } else {
380 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
381 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
382 }
383
384 if (Subtarget.hasFPRND()) {
385 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
386 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
387 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
388 setOperationAction(ISD::FROUND, MVT::f64, Legal);
389
390 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
391 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
392 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
393 setOperationAction(ISD::FROUND, MVT::f32, Legal);
394 }
395
396 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
397 // to speed up scalar BSWAP64.
398 // CTPOP or CTTZ were introduced in P8/P9 respectively
399 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
400 if (Subtarget.hasP9Vector())
401 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
402 else
403 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
404 if (Subtarget.isISA3_0()) {
405 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
406 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
407 } else {
408 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
409 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
410 }
411
412 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
413 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
414 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
415 } else {
416 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
417 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
418 }
419
420 // PowerPC does not have ROTR
421 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
422 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
423
424 if (!Subtarget.useCRBits()) {
425 // PowerPC does not have Select
426 setOperationAction(ISD::SELECT, MVT::i32, Expand);
427 setOperationAction(ISD::SELECT, MVT::i64, Expand);
428 setOperationAction(ISD::SELECT, MVT::f32, Expand);
429 setOperationAction(ISD::SELECT, MVT::f64, Expand);
430 }
431
432 // PowerPC wants to turn select_cc of FP into fsel when possible.
433 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435
436 // PowerPC wants to optimize integer setcc a bit
437 if (!Subtarget.useCRBits())
438 setOperationAction(ISD::SETCC, MVT::i32, Custom);
439
440 if (Subtarget.hasFPU()) {
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
442 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
443 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
444
445 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
446 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
447 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
448 }
449
450 // PowerPC does not have BRCOND which requires SetCC
451 if (!Subtarget.useCRBits())
452 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
453
454 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
455
456 if (Subtarget.hasSPE()) {
457 // SPE has built-in conversions
458 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
459 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
460 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
461 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
463 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
464 } else {
465 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
466 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
467 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
468
469 // PowerPC does not have [U|S]INT_TO_FP
470 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
472 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
473 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
474 }
475
476 if (Subtarget.hasDirectMove() && isPPC64) {
477 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
478 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
479 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
480 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
481 if (TM.Options.UnsafeFPMath) {
482 setOperationAction(ISD::LRINT, MVT::f64, Legal);
483 setOperationAction(ISD::LRINT, MVT::f32, Legal);
484 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
485 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
486 setOperationAction(ISD::LROUND, MVT::f64, Legal);
487 setOperationAction(ISD::LROUND, MVT::f32, Legal);
488 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
489 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
490 }
491 } else {
492 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
493 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
494 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
495 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
496 }
497
498 // We cannot sextinreg(i1). Expand to shifts.
499 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
500
501 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
502 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
503 // support continuation, user-level threading, and etc.. As a result, no
504 // other SjLj exception interfaces are implemented and please don't build
505 // your own exception handling based on them.
506 // LLVM/Clang supports zero-cost DWARF exception handling.
507 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
508 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
509
510 // We want to legalize GlobalAddress and ConstantPool nodes into the
511 // appropriate instructions to materialize the address.
512 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
513 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
514 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
515 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
516 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
517 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
518 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
519 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
520 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
521 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
522
523 // TRAP is legal.
524 setOperationAction(ISD::TRAP, MVT::Other, Legal);
525
526 // TRAMPOLINE is custom lowered.
527 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
528 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
529
530 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
531 setOperationAction(ISD::VASTART , MVT::Other, Custom);
532
533 if (Subtarget.is64BitELFABI()) {
534 // VAARG always uses double-word chunks, so promote anything smaller.
535 setOperationAction(ISD::VAARG, MVT::i1, Promote);
536 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
537 setOperationAction(ISD::VAARG, MVT::i8, Promote);
538 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
539 setOperationAction(ISD::VAARG, MVT::i16, Promote);
540 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
541 setOperationAction(ISD::VAARG, MVT::i32, Promote);
542 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
543 setOperationAction(ISD::VAARG, MVT::Other, Expand);
544 } else if (Subtarget.is32BitELFABI()) {
545 // VAARG is custom lowered with the 32-bit SVR4 ABI.
546 setOperationAction(ISD::VAARG, MVT::Other, Custom);
547 setOperationAction(ISD::VAARG, MVT::i64, Custom);
548 } else
549 setOperationAction(ISD::VAARG, MVT::Other, Expand);
550
551 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
552 if (Subtarget.is32BitELFABI())
553 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
554 else
555 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
556
557 // Use the default implementation.
558 setOperationAction(ISD::VAEND , MVT::Other, Expand);
559 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
560 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
561 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
562 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
563 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
564 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
565 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
566 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
567
568 // We want to custom lower some of our intrinsics.
569 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
570
571 // To handle counter-based loop conditions.
572 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
573
574 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
575 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
576 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
577 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
578
579 // Comparisons that require checking two conditions.
580 if (Subtarget.hasSPE()) {
581 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
582 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
583 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
584 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
585 }
586 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
587 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
588 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
589 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
590 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
591 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
592 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
593 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
594 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
595 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
596 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
597 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
598
599 if (Subtarget.has64BitSupport()) {
600 // They also have instructions for converting between i64 and fp.
601 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
602 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
603 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
604 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
605 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
606 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
607 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
608 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
609 // This is just the low 32 bits of a (signed) fp->i64 conversion.
610 // We cannot do this with Promote because i64 is not a legal type.
611 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
612 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
613
614 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
615 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
616 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
617 }
618 } else {
619 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
620 if (Subtarget.hasSPE()) {
621 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
622 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
623 } else {
624 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
625 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
626 }
627 }
628
629 // With the instructions enabled under FPCVT, we can do everything.
630 if (Subtarget.hasFPCVT()) {
631 if (Subtarget.has64BitSupport()) {
632 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
633 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
634 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
635 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
636 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
637 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
638 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
639 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
640 }
641
642 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
643 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
644 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
645 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
646 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
647 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
648 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
649 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
650 }
651
652 if (Subtarget.use64BitRegs()) {
653 // 64-bit PowerPC implementations can support i64 types directly
654 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
655 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
656 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
657 // 64-bit PowerPC wants to expand i128 shifts itself.
658 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
659 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
660 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
661 } else {
662 // 32-bit PowerPC wants to expand i64 shifts itself.
663 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
664 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
665 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
666 }
667
668 // PowerPC has better expansions for funnel shifts than the generic
669 // TargetLowering::expandFunnelShift.
670 if (Subtarget.has64BitSupport()) {
671 setOperationAction(ISD::FSHL, MVT::i64, Custom);
672 setOperationAction(ISD::FSHR, MVT::i64, Custom);
673 }
674 setOperationAction(ISD::FSHL, MVT::i32, Custom);
675 setOperationAction(ISD::FSHR, MVT::i32, Custom);
676
677 if (Subtarget.hasVSX()) {
678 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
679 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
680 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
681 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
682 }
683
684 if (Subtarget.hasAltivec()) {
685 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
686 setOperationAction(ISD::SADDSAT, VT, Legal);
687 setOperationAction(ISD::SSUBSAT, VT, Legal);
688 setOperationAction(ISD::UADDSAT, VT, Legal);
689 setOperationAction(ISD::USUBSAT, VT, Legal);
690 }
691 // First set operation action for all vector types to expand. Then we
692 // will selectively turn on ones that can be effectively codegen'd.
693 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
694 // add/sub are legal for all supported vector VT's.
695 setOperationAction(ISD::ADD, VT, Legal);
696 setOperationAction(ISD::SUB, VT, Legal);
697
698 // For v2i64, these are only valid with P8Vector. This is corrected after
699 // the loop.
700 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
701 setOperationAction(ISD::SMAX, VT, Legal);
702 setOperationAction(ISD::SMIN, VT, Legal);
703 setOperationAction(ISD::UMAX, VT, Legal);
704 setOperationAction(ISD::UMIN, VT, Legal);
705 }
706 else {
707 setOperationAction(ISD::SMAX, VT, Expand);
708 setOperationAction(ISD::SMIN, VT, Expand);
709 setOperationAction(ISD::UMAX, VT, Expand);
710 setOperationAction(ISD::UMIN, VT, Expand);
711 }
712
713 if (Subtarget.hasVSX()) {
714 setOperationAction(ISD::FMAXNUM, VT, Legal);
715 setOperationAction(ISD::FMINNUM, VT, Legal);
716 }
717
718 // Vector instructions introduced in P8
719 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
720 setOperationAction(ISD::CTPOP, VT, Legal);
721 setOperationAction(ISD::CTLZ, VT, Legal);
722 }
723 else {
724 setOperationAction(ISD::CTPOP, VT, Expand);
725 setOperationAction(ISD::CTLZ, VT, Expand);
726 }
727
728 // Vector instructions introduced in P9
729 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
730 setOperationAction(ISD::CTTZ, VT, Legal);
731 else
732 setOperationAction(ISD::CTTZ, VT, Expand);
733
734 // We promote all shuffles to v16i8.
735 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
736 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
737
738 // We promote all non-typed operations to v4i32.
739 setOperationAction(ISD::AND , VT, Promote);
740 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
741 setOperationAction(ISD::OR , VT, Promote);
742 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
743 setOperationAction(ISD::XOR , VT, Promote);
744 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
745 setOperationAction(ISD::LOAD , VT, Promote);
746 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
747 setOperationAction(ISD::SELECT, VT, Promote);
748 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
749 setOperationAction(ISD::VSELECT, VT, Legal);
750 setOperationAction(ISD::SELECT_CC, VT, Promote);
751 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
752 setOperationAction(ISD::STORE, VT, Promote);
753 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
754
755 // No other operations are legal.
756 setOperationAction(ISD::MUL , VT, Expand);
757 setOperationAction(ISD::SDIV, VT, Expand);
758 setOperationAction(ISD::SREM, VT, Expand);
759 setOperationAction(ISD::UDIV, VT, Expand);
760 setOperationAction(ISD::UREM, VT, Expand);
761 setOperationAction(ISD::FDIV, VT, Expand);
762 setOperationAction(ISD::FREM, VT, Expand);
763 setOperationAction(ISD::FNEG, VT, Expand);
764 setOperationAction(ISD::FSQRT, VT, Expand);
765 setOperationAction(ISD::FLOG, VT, Expand);
766 setOperationAction(ISD::FLOG10, VT, Expand);
767 setOperationAction(ISD::FLOG2, VT, Expand);
768 setOperationAction(ISD::FEXP, VT, Expand);
769 setOperationAction(ISD::FEXP2, VT, Expand);
770 setOperationAction(ISD::FSIN, VT, Expand);
771 setOperationAction(ISD::FCOS, VT, Expand);
772 setOperationAction(ISD::FABS, VT, Expand);
773 setOperationAction(ISD::FFLOOR, VT, Expand);
774 setOperationAction(ISD::FCEIL, VT, Expand);
775 setOperationAction(ISD::FTRUNC, VT, Expand);
776 setOperationAction(ISD::FRINT, VT, Expand);
777 setOperationAction(ISD::FNEARBYINT, VT, Expand);
778 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
779 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
781 setOperationAction(ISD::MULHU, VT, Expand);
782 setOperationAction(ISD::MULHS, VT, Expand);
783 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
784 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785 setOperationAction(ISD::UDIVREM, VT, Expand);
786 setOperationAction(ISD::SDIVREM, VT, Expand);
787 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
788 setOperationAction(ISD::FPOW, VT, Expand);
789 setOperationAction(ISD::BSWAP, VT, Expand);
790 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
791 setOperationAction(ISD::ROTL, VT, Expand);
792 setOperationAction(ISD::ROTR, VT, Expand);
793
794 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
795 setTruncStoreAction(VT, InnerVT, Expand);
796 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
797 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
798 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
799 }
800 }
801 setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
802 if (!Subtarget.hasP8Vector()) {
803 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
804 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
805 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
806 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
807 }
808
809 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
810 setOperationAction(ISD::ABS, VT, Custom);
811
812 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
813 // with merges, splats, etc.
814 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
815
816 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
817 // are cheap, so handle them before they get expanded to scalar.
818 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
819 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
820 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
821 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
822 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
823
824 setOperationAction(ISD::AND , MVT::v4i32, Legal);
825 setOperationAction(ISD::OR , MVT::v4i32, Legal);
826 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
827 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
828 setOperationAction(ISD::SELECT, MVT::v4i32,
829 Subtarget.useCRBits() ? Legal : Expand);
830 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
831 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
832 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
833 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
834 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
835 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
836 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
837 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
838 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
839 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
840 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
841 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
842 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
843
844 // Without hasP8Altivec set, v2i64 SMAX isn't available.
845 // But ABS custom lowering requires SMAX support.
846 if (!Subtarget.hasP8Altivec())
847 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
848
849 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
850 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
851 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
852 if (Subtarget.hasAltivec())
853 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
854 setOperationAction(ISD::ROTL, VT, Legal);
855 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
856 if (Subtarget.hasP8Altivec())
857 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
858
859 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
860 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
861 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
862 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
863
864 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
865 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
866
867 if (Subtarget.hasVSX()) {
868 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
869 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
870 }
871
872 if (Subtarget.hasP8Altivec())
873 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
874 else
875 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
876
877 if (Subtarget.isISA3_1()) {
878 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
879 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
880 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
881 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
882 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
883 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
884 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
885 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
886 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
887 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
888 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
889 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
890 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
891 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
892 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
893 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
894 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
895 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
896 }
897
898 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
899 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
900
901 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
902 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
903
904 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
905 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
906 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
907 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
908
909 // Altivec does not contain unordered floating-point compare instructions
910 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
911 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
912 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
913 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
914
915 if (Subtarget.hasVSX()) {
916 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
917 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
918 if (Subtarget.hasP8Vector()) {
919 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
920 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
921 }
922 if (Subtarget.hasDirectMove() && isPPC64) {
923 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
924 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
925 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
926 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
927 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
928 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
929 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
930 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
931 }
932 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
933
934 // The nearbyint variants are not allowed to raise the inexact exception
935 // so we can only code-gen them with unsafe math.
936 if (TM.Options.UnsafeFPMath) {
937 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
938 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
939 }
940
941 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
942 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
943 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
944 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
945 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
946 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
947 setOperationAction(ISD::FROUND, MVT::f64, Legal);
948 setOperationAction(ISD::FRINT, MVT::f64, Legal);
949
950 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
951 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
952 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
953 setOperationAction(ISD::FROUND, MVT::f32, Legal);
954 setOperationAction(ISD::FRINT, MVT::f32, Legal);
955
956 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
957 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
958
959 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
960 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
961
962 // Share the Altivec comparison restrictions.
963 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
964 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
965 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
966 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
967
968 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
969 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
970
971 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
972
973 if (Subtarget.hasP8Vector())
974 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
975
976 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
977
978 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
979 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
980 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
981
982 if (Subtarget.hasP8Altivec()) {
983 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
984 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
985 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
986
987 // 128 bit shifts can be accomplished via 3 instructions for SHL and
988 // SRL, but not for SRA because of the instructions available:
989 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
990 // doing
991 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
992 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
993 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
994
995 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
996 }
997 else {
998 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
999 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1000 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1001
1002 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1003
1004 // VSX v2i64 only supports non-arithmetic operations.
1005 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1006 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1007 }
1008
1009 if (Subtarget.isISA3_1())
1010 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1011 else
1012 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1013
1014 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1015 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1016 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1017 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1018
1019 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1020
1021 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1022 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1023 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1024 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1025 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1026 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1027 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1028 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1029
1030 // Custom handling for partial vectors of integers converted to
1031 // floating point. We already have optimal handling for v2i32 through
1032 // the DAG combine, so those aren't necessary.
1033 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1034 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1035 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1036 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1037 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1038 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1039 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1040 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1041 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1042 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1043 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1044 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1045 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1046 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1047 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1048 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1049
1050 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1051 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1052 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1053 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1054 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1055 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1056
1057 if (Subtarget.hasDirectMove())
1058 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1059 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1060
1061 // Handle constrained floating-point operations of vector.
1062 // The predictor is `hasVSX` because altivec instruction has
1063 // no exception but VSX vector instruction has.
1064 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1065 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1066 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1067 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1068 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1069 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1070 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1071 setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1072 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1073 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1074 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1075 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1076 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1077
1078 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1079 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1080 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1081 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1082 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1083 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1084 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1086 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1087 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1088 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1089 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1090 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1091
1092 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1093 }
1094
1095 if (Subtarget.hasP8Altivec()) {
1096 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1097 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1098 }
1099
1100 if (Subtarget.hasP9Vector()) {
1101 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1102 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1103
1104 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1105 // SRL, but not for SRA because of the instructions available:
1106 // VS{RL} and VS{RL}O.
1107 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1108 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1109 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1110
1111 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1112 setOperationAction(ISD::FADD, MVT::f128, Legal);
1113 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1114 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1115 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1116 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1117 // No extending loads to f128 on PPC.
1118 for (MVT FPT : MVT::fp_valuetypes())
1119 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1120 setOperationAction(ISD::FMA, MVT::f128, Legal);
1121 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1122 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1123 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1124 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1125 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1126 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1127
1128 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1129 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1130 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1131 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1132 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1133 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1134
1135 setOperationAction(ISD::SELECT, MVT::f128, Expand);
1136 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1137 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1138 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1139 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1140 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1141 // No implementation for these ops for PowerPC.
1142 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1143 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1144 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1145 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1146 setOperationAction(ISD::FREM, MVT::f128, Expand);
1147
1148 // Handle constrained floating-point operations of fp128
1149 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1150 setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1151 setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1152 setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1153 setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1154 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1155 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1156 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1157 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1158 setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1159 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1160 setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1161 setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1162 setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1163 setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1164 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1165 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1166 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1167 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1168 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1169 }
1170
1171 if (Subtarget.hasP9Altivec()) {
1172 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1173 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1174
1175 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1177 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1178 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1179 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1180 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1181 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1182 }
1183 }
1184
1185 if (Subtarget.pairedVectorMemops()) {
1186 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1187 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1188 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1189 }
1190 if (Subtarget.hasMMA()) {
1191 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1192 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1193 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1194 setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1195 }
1196
1197 if (Subtarget.has64BitSupport())
1198 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1199
1200 if (Subtarget.isISA3_1())
1201 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1202
1203 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1204
1205 if (!isPPC64) {
1206 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1207 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1208 }
1209
1210 setBooleanContents(ZeroOrOneBooleanContent);
1211
1212 if (Subtarget.hasAltivec()) {
1213 // Altivec instructions set fields to all zeros or all ones.
1214 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1215 }
1216
1217 if (!isPPC64) {
1218 // These libcalls are not available in 32-bit.
1219 setLibcallName(RTLIB::SHL_I128, nullptr);
1220 setLibcallName(RTLIB::SRL_I128, nullptr);
1221 setLibcallName(RTLIB::SRA_I128, nullptr);
1222 }
1223
1224 if (!isPPC64)
1225 setMaxAtomicSizeInBitsSupported(32);
1226
1227 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1228
1229 // We have target-specific dag combine patterns for the following nodes:
1230 setTargetDAGCombine(ISD::ADD);
1231 setTargetDAGCombine(ISD::SHL);
1232 setTargetDAGCombine(ISD::SRA);
1233 setTargetDAGCombine(ISD::SRL);
1234 setTargetDAGCombine(ISD::MUL);
1235 setTargetDAGCombine(ISD::FMA);
1236 setTargetDAGCombine(ISD::SINT_TO_FP);
1237 setTargetDAGCombine(ISD::BUILD_VECTOR);
1238 if (Subtarget.hasFPCVT())
1239 setTargetDAGCombine(ISD::UINT_TO_FP);
1240 setTargetDAGCombine(ISD::LOAD);
1241 setTargetDAGCombine(ISD::STORE);
1242 setTargetDAGCombine(ISD::BR_CC);
1243 if (Subtarget.useCRBits())
1244 setTargetDAGCombine(ISD::BRCOND);
1245 setTargetDAGCombine(ISD::BSWAP);
1246 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1247 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1248 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1249
1250 setTargetDAGCombine(ISD::SIGN_EXTEND);
1251 setTargetDAGCombine(ISD::ZERO_EXTEND);
1252 setTargetDAGCombine(ISD::ANY_EXTEND);
1253
1254 setTargetDAGCombine(ISD::TRUNCATE);
1255 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1256
1257
1258 if (Subtarget.useCRBits()) {
1259 setTargetDAGCombine(ISD::TRUNCATE);
1260 setTargetDAGCombine(ISD::SETCC);
1261 setTargetDAGCombine(ISD::SELECT_CC);
1262 }
1263
1264 if (Subtarget.hasP9Altivec()) {
1265 setTargetDAGCombine(ISD::ABS);
1266 setTargetDAGCombine(ISD::VSELECT);
1267 }
1268
1269 setLibcallName(RTLIB::LOG_F128, "logf128");
1270 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1271 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1272 setLibcallName(RTLIB::EXP_F128, "expf128");
1273 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1274 setLibcallName(RTLIB::SIN_F128, "sinf128");
1275 setLibcallName(RTLIB::COS_F128, "cosf128");
1276 setLibcallName(RTLIB::POW_F128, "powf128");
1277 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1278 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1279 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1280 setLibcallName(RTLIB::REM_F128, "fmodf128");
1281
1282 // With 32 condition bits, we don't need to sink (and duplicate) compares
1283 // aggressively in CodeGenPrep.
1284 if (Subtarget.useCRBits()) {
1285 setHasMultipleConditionRegisters();
1286 setJumpIsExpensive();
1287 }
1288
1289 setMinFunctionAlignment(Align(4));
1290
1291 switch (Subtarget.getCPUDirective()) {
1292 default: break;
1293 case PPC::DIR_970:
1294 case PPC::DIR_A2:
1295 case PPC::DIR_E500:
1296 case PPC::DIR_E500mc:
1297 case PPC::DIR_E5500:
1298 case PPC::DIR_PWR4:
1299 case PPC::DIR_PWR5:
1300 case PPC::DIR_PWR5X:
1301 case PPC::DIR_PWR6:
1302 case PPC::DIR_PWR6X:
1303 case PPC::DIR_PWR7:
1304 case PPC::DIR_PWR8:
1305 case PPC::DIR_PWR9:
1306 case PPC::DIR_PWR10:
1307 case PPC::DIR_PWR_FUTURE:
1308 setPrefLoopAlignment(Align(16));
1309 setPrefFunctionAlignment(Align(16));
1310 break;
1311 }
1312
1313 if (Subtarget.enableMachineScheduler())
1314 setSchedulingPreference(Sched::Source);
1315 else
1316 setSchedulingPreference(Sched::Hybrid);
1317
1318 computeRegisterProperties(STI.getRegisterInfo());
1319
1320 // The Freescale cores do better with aggressive inlining of memcpy and
1321 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1322 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1323 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1324 MaxStoresPerMemset = 32;
1325 MaxStoresPerMemsetOptSize = 16;
1326 MaxStoresPerMemcpy = 32;
1327 MaxStoresPerMemcpyOptSize = 8;
1328 MaxStoresPerMemmove = 32;
1329 MaxStoresPerMemmoveOptSize = 8;
1330 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1331 // The A2 also benefits from (very) aggressive inlining of memcpy and
1332 // friends. The overhead of a the function call, even when warm, can be
1333 // over one hundred cycles.
1334 MaxStoresPerMemset = 128;
1335 MaxStoresPerMemcpy = 128;
1336 MaxStoresPerMemmove = 128;
1337 MaxLoadsPerMemcmp = 128;
1338 } else {
1339 MaxLoadsPerMemcmp = 8;
1340 MaxLoadsPerMemcmpOptSize = 4;
1341 }
1342
1343 IsStrictFPEnabled = true;
1344
1345 // Let the subtarget (CPU) decide if a predictable select is more expensive
1346 // than the corresponding branch. This information is used in CGP to decide
1347 // when to convert selects into branches.
1348 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1349}
1350
1351/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1352/// the desired ByVal argument alignment.
1353static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1354 if (MaxAlign == MaxMaxAlign)
1355 return;
1356 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1357 if (MaxMaxAlign >= 32 &&
1358 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1359 MaxAlign = Align(32);
1360 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1361 MaxAlign < 16)
1362 MaxAlign = Align(16);
1363 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1364 Align EltAlign;
1365 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1366 if (EltAlign > MaxAlign)
1367 MaxAlign = EltAlign;
1368 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1369 for (auto *EltTy : STy->elements()) {
1370 Align EltAlign;
1371 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1372 if (EltAlign > MaxAlign)
1373 MaxAlign = EltAlign;
1374 if (MaxAlign == MaxMaxAlign)
1375 break;
1376 }
1377 }
1378}
1379
1380/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1381/// function arguments in the caller parameter area.
1382unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1383 const DataLayout &DL) const {
1384 // 16byte and wider vectors are passed on 16byte boundary.
1385 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1386 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1387 if (Subtarget.hasAltivec())
1388 getMaxByValAlign(Ty, Alignment, Align(16));
1389 return Alignment.value();
1390}
1391
1392bool PPCTargetLowering::useSoftFloat() const {
1393 return Subtarget.useSoftFloat();
1394}
1395
1396bool PPCTargetLowering::hasSPE() const {
1397 return Subtarget.hasSPE();
1398}
1399
1400bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1401 return VT.isScalarInteger();
1402}
1403
1404const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1405 switch ((PPCISD::NodeType)Opcode) {
1406 case PPCISD::FIRST_NUMBER: break;
1407 case PPCISD::FSEL: return "PPCISD::FSEL";
1408 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1409 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1410 case PPCISD::FCFID: return "PPCISD::FCFID";
1411 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1412 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1413 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1414 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1415 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1416 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1417 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1418 case PPCISD::FP_TO_UINT_IN_VSR:
1419 return "PPCISD::FP_TO_UINT_IN_VSR,";
1420 case PPCISD::FP_TO_SINT_IN_VSR:
1421 return "PPCISD::FP_TO_SINT_IN_VSR";
1422 case PPCISD::FRE: return "PPCISD::FRE";
1423 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1424 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1425 case PPCISD::VPERM: return "PPCISD::VPERM";
1426 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1427 case PPCISD::XXSPLTI_SP_TO_DP:
1428 return "PPCISD::XXSPLTI_SP_TO_DP";
1429 case PPCISD::XXSPLTI32DX:
1430 return "PPCISD::XXSPLTI32DX";
1431 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1432 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1433 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1434 case PPCISD::CMPB: return "PPCISD::CMPB";
1435 case PPCISD::Hi: return "PPCISD::Hi";
1436 case PPCISD::Lo: return "PPCISD::Lo";
1437 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1438 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1439 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1440 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1441 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1442 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1443 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1444 case PPCISD::SRL: return "PPCISD::SRL";
1445 case PPCISD::SRA: return "PPCISD::SRA";
1446 case PPCISD::SHL: return "PPCISD::SHL";
1447 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1448 case PPCISD::CALL: return "PPCISD::CALL";
1449 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1450 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1451 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1452 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1453 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1454 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1455 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1456 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1457 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1458 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1459 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1460 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1461 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1462 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1463 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1464 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1465 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1466 case PPCISD::ANDI_rec_1_EQ_BIT:
1467 return "PPCISD::ANDI_rec_1_EQ_BIT";
1468 case PPCISD::ANDI_rec_1_GT_BIT:
1469 return "PPCISD::ANDI_rec_1_GT_BIT";
1470 case PPCISD::VCMP: return "PPCISD::VCMP";
1471 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1472 case PPCISD::LBRX: return "PPCISD::LBRX";
1473 case PPCISD::STBRX: return "PPCISD::STBRX";
1474 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1475 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1476 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1477 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1478 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1479 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1480 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1481 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1482 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1483 case PPCISD::ST_VSR_SCAL_INT:
1484 return "PPCISD::ST_VSR_SCAL_INT";
1485 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1486 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1487 case PPCISD::BDZ: return "PPCISD::BDZ";
1488 case PPCISD::MFFS: return "PPCISD::MFFS";
1489 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1490 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1491 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1492 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1493 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1494 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1495 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1496 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1497 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1498 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1499 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1500 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1501 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1502 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1503 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1504 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1505 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1506 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1507 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1508 case PPCISD::PADDI_DTPREL:
1509 return "PPCISD::PADDI_DTPREL";
1510 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1511 case PPCISD::SC: return "PPCISD::SC";
1512 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1513 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1514 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1515 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1516 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1517 case PPCISD::VABSD: return "PPCISD::VABSD";
1518 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1519 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1520 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1521 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1522 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1523 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1524 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1525 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1526 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1527 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1528 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1529 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1530 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1531 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1532 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1533 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1534 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1535 case PPCISD::STRICT_FADDRTZ:
1536 return "PPCISD::STRICT_FADDRTZ";
1537 case PPCISD::STRICT_FCTIDZ:
1538 return "PPCISD::STRICT_FCTIDZ";
1539 case PPCISD::STRICT_FCTIWZ:
1540 return "PPCISD::STRICT_FCTIWZ";
1541 case PPCISD::STRICT_FCTIDUZ:
1542 return "PPCISD::STRICT_FCTIDUZ";
1543 case PPCISD::STRICT_FCTIWUZ:
1544 return "PPCISD::STRICT_FCTIWUZ";
1545 case PPCISD::STRICT_FCFID:
1546 return "PPCISD::STRICT_FCFID";
1547 case PPCISD::STRICT_FCFIDU:
1548 return "PPCISD::STRICT_FCFIDU";
1549 case PPCISD::STRICT_FCFIDS:
1550 return "PPCISD::STRICT_FCFIDS";
1551 case PPCISD::STRICT_FCFIDUS:
1552 return "PPCISD::STRICT_FCFIDUS";
1553 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1554 }
1555 return nullptr;
1556}
1557
1558EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1559 EVT VT) const {
1560 if (!VT.isVector())
1561 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1562
1563 return VT.changeVectorElementTypeToInteger();
1564}
1565
1566bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1567 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1567, __PRETTY_FUNCTION__))
;
1568 return true;
1569}
1570
1571//===----------------------------------------------------------------------===//
1572// Node matching predicates, for use by the tblgen matching code.
1573//===----------------------------------------------------------------------===//
1574
1575/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1576static bool isFloatingPointZero(SDValue Op) {
1577 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1578 return CFP->getValueAPF().isZero();
1579 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1580 // Maybe this has already been legalized into the constant pool?
1581 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1582 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1583 return CFP->getValueAPF().isZero();
1584 }
1585 return false;
1586}
1587
1588/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1589/// true if Op is undef or if it matches the specified value.
1590static bool isConstantOrUndef(int Op, int Val) {
1591 return Op < 0 || Op == Val;
1592}
1593
1594/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1595/// VPKUHUM instruction.
1596/// The ShuffleKind distinguishes between big-endian operations with
1597/// two different inputs (0), either-endian operations with two identical
1598/// inputs (1), and little-endian operations with two different inputs (2).
1599/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1600bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1601 SelectionDAG &DAG) {
1602 bool IsLE = DAG.getDataLayout().isLittleEndian();
1603 if (ShuffleKind == 0) {
1604 if (IsLE)
1605 return false;
1606 for (unsigned i = 0; i != 16; ++i)
1607 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1608 return false;
1609 } else if (ShuffleKind == 2) {
1610 if (!IsLE)
1611 return false;
1612 for (unsigned i = 0; i != 16; ++i)
1613 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1614 return false;
1615 } else if (ShuffleKind == 1) {
1616 unsigned j = IsLE ? 0 : 1;
1617 for (unsigned i = 0; i != 8; ++i)
1618 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1619 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1620 return false;
1621 }
1622 return true;
1623}
1624
1625/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1626/// VPKUWUM instruction.
1627/// The ShuffleKind distinguishes between big-endian operations with
1628/// two different inputs (0), either-endian operations with two identical
1629/// inputs (1), and little-endian operations with two different inputs (2).
1630/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1631bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1632 SelectionDAG &DAG) {
1633 bool IsLE = DAG.getDataLayout().isLittleEndian();
1634 if (ShuffleKind == 0) {
1635 if (IsLE)
1636 return false;
1637 for (unsigned i = 0; i != 16; i += 2)
1638 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1639 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1640 return false;
1641 } else if (ShuffleKind == 2) {
1642 if (!IsLE)
1643 return false;
1644 for (unsigned i = 0; i != 16; i += 2)
1645 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1646 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1647 return false;
1648 } else if (ShuffleKind == 1) {
1649 unsigned j = IsLE ? 0 : 2;
1650 for (unsigned i = 0; i != 8; i += 2)
1651 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1652 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1653 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1654 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1655 return false;
1656 }
1657 return true;
1658}
1659
1660/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1661/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1662/// current subtarget.
1663///
1664/// The ShuffleKind distinguishes between big-endian operations with
1665/// two different inputs (0), either-endian operations with two identical
1666/// inputs (1), and little-endian operations with two different inputs (2).
1667/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1668bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1669 SelectionDAG &DAG) {
1670 const PPCSubtarget& Subtarget =
1671 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1672 if (!Subtarget.hasP8Vector())
1673 return false;
1674
1675 bool IsLE = DAG.getDataLayout().isLittleEndian();
1676 if (ShuffleKind == 0) {
1677 if (IsLE)
1678 return false;
1679 for (unsigned i = 0; i != 16; i += 4)
1680 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1681 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1682 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1683 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1684 return false;
1685 } else if (ShuffleKind == 2) {
1686 if (!IsLE)
1687 return false;
1688 for (unsigned i = 0; i != 16; i += 4)
1689 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1690 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1691 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1692 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1693 return false;
1694 } else if (ShuffleKind == 1) {
1695 unsigned j = IsLE ? 0 : 4;
1696 for (unsigned i = 0; i != 8; i += 4)
1697 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1698 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1699 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1700 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1701 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1702 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1703 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1704 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1705 return false;
1706 }
1707 return true;
1708}
1709
1710/// isVMerge - Common function, used to match vmrg* shuffles.
1711///
1712static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1713 unsigned LHSStart, unsigned RHSStart) {
1714 if (N->getValueType(0) != MVT::v16i8)
1715 return false;
1716 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1717, __PRETTY_FUNCTION__))
1717 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1717, __PRETTY_FUNCTION__))
;
1718
1719 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1720 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1721 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1722 LHSStart+j+i*UnitSize) ||
1723 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1724 RHSStart+j+i*UnitSize))
1725 return false;
1726 }
1727 return true;
1728}
1729
1730/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1731/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1732/// The ShuffleKind distinguishes between big-endian merges with two
1733/// different inputs (0), either-endian merges with two identical inputs (1),
1734/// and little-endian merges with two different inputs (2). For the latter,
1735/// the input operands are swapped (see PPCInstrAltivec.td).
1736bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1737 unsigned ShuffleKind, SelectionDAG &DAG) {
1738 if (DAG.getDataLayout().isLittleEndian()) {
1739 if (ShuffleKind == 1) // unary
1740 return isVMerge(N, UnitSize, 0, 0);
1741 else if (ShuffleKind == 2) // swapped
1742 return isVMerge(N, UnitSize, 0, 16);
1743 else
1744 return false;
1745 } else {
1746 if (ShuffleKind == 1) // unary
1747 return isVMerge(N, UnitSize, 8, 8);
1748 else if (ShuffleKind == 0) // normal
1749 return isVMerge(N, UnitSize, 8, 24);
1750 else
1751 return false;
1752 }
1753}
1754
1755/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1756/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1757/// The ShuffleKind distinguishes between big-endian merges with two
1758/// different inputs (0), either-endian merges with two identical inputs (1),
1759/// and little-endian merges with two different inputs (2). For the latter,
1760/// the input operands are swapped (see PPCInstrAltivec.td).
1761bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1762 unsigned ShuffleKind, SelectionDAG &DAG) {
1763 if (DAG.getDataLayout().isLittleEndian()) {
1764 if (ShuffleKind == 1) // unary
1765 return isVMerge(N, UnitSize, 8, 8);
1766 else if (ShuffleKind == 2) // swapped
1767 return isVMerge(N, UnitSize, 8, 24);
1768 else
1769 return false;
1770 } else {
1771 if (ShuffleKind == 1) // unary
1772 return isVMerge(N, UnitSize, 0, 0);
1773 else if (ShuffleKind == 0) // normal
1774 return isVMerge(N, UnitSize, 0, 16);
1775 else
1776 return false;
1777 }
1778}
1779
1780/**
1781 * Common function used to match vmrgew and vmrgow shuffles
1782 *
1783 * The indexOffset determines whether to look for even or odd words in
1784 * the shuffle mask. This is based on the of the endianness of the target
1785 * machine.
1786 * - Little Endian:
1787 * - Use offset of 0 to check for odd elements
1788 * - Use offset of 4 to check for even elements
1789 * - Big Endian:
1790 * - Use offset of 0 to check for even elements
1791 * - Use offset of 4 to check for odd elements
1792 * A detailed description of the vector element ordering for little endian and
1793 * big endian can be found at
1794 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1795 * Targeting your applications - what little endian and big endian IBM XL C/C++
1796 * compiler differences mean to you
1797 *
1798 * The mask to the shuffle vector instruction specifies the indices of the
1799 * elements from the two input vectors to place in the result. The elements are
1800 * numbered in array-access order, starting with the first vector. These vectors
1801 * are always of type v16i8, thus each vector will contain 16 elements of size
1802 * 8. More info on the shuffle vector can be found in the
1803 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1804 * Language Reference.
1805 *
1806 * The RHSStartValue indicates whether the same input vectors are used (unary)
1807 * or two different input vectors are used, based on the following:
1808 * - If the instruction uses the same vector for both inputs, the range of the
1809 * indices will be 0 to 15. In this case, the RHSStart value passed should
1810 * be 0.
1811 * - If the instruction has two different vectors then the range of the
1812 * indices will be 0 to 31. In this case, the RHSStart value passed should
1813 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1814 * to 31 specify elements in the second vector).
1815 *
1816 * \param[in] N The shuffle vector SD Node to analyze
1817 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1818 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1819 * vector to the shuffle_vector instruction
1820 * \return true iff this shuffle vector represents an even or odd word merge
1821 */
1822static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1823 unsigned RHSStartValue) {
1824 if (N->getValueType(0) != MVT::v16i8)
1825 return false;
1826
1827 for (unsigned i = 0; i < 2; ++i)
1828 for (unsigned j = 0; j < 4; ++j)
1829 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1830 i*RHSStartValue+j+IndexOffset) ||
1831 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1832 i*RHSStartValue+j+IndexOffset+8))
1833 return false;
1834 return true;
1835}
1836
1837/**
1838 * Determine if the specified shuffle mask is suitable for the vmrgew or
1839 * vmrgow instructions.
1840 *
1841 * \param[in] N The shuffle vector SD Node to analyze
1842 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1843 * \param[in] ShuffleKind Identify the type of merge:
1844 * - 0 = big-endian merge with two different inputs;
1845 * - 1 = either-endian merge with two identical inputs;
1846 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1847 * little-endian merges).
1848 * \param[in] DAG The current SelectionDAG
1849 * \return true iff this shuffle mask
1850 */
1851bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1852 unsigned ShuffleKind, SelectionDAG &DAG) {
1853 if (DAG.getDataLayout().isLittleEndian()) {
1854 unsigned indexOffset = CheckEven ? 4 : 0;
1855 if (ShuffleKind == 1) // Unary
1856 return isVMerge(N, indexOffset, 0);
1857 else if (ShuffleKind == 2) // swapped
1858 return isVMerge(N, indexOffset, 16);
1859 else
1860 return false;
1861 }
1862 else {
1863 unsigned indexOffset = CheckEven ? 0 : 4;
1864 if (ShuffleKind == 1) // Unary
1865 return isVMerge(N, indexOffset, 0);
1866 else if (ShuffleKind == 0) // Normal
1867 return isVMerge(N, indexOffset, 16);
1868 else
1869 return false;
1870 }
1871 return false;
1872}
1873
1874/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1875/// amount, otherwise return -1.
1876/// The ShuffleKind distinguishes between big-endian operations with two
1877/// different inputs (0), either-endian operations with two identical inputs
1878/// (1), and little-endian operations with two different inputs (2). For the
1879/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1880int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1881 SelectionDAG &DAG) {
1882 if (N->getValueType(0) != MVT::v16i8)
1883 return -1;
1884
1885 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1886
1887 // Find the first non-undef value in the shuffle mask.
1888 unsigned i;
1889 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1890 /*search*/;
1891
1892 if (i == 16) return -1; // all undef.
1893
1894 // Otherwise, check to see if the rest of the elements are consecutively
1895 // numbered from this value.
1896 unsigned ShiftAmt = SVOp->getMaskElt(i);
1897 if (ShiftAmt < i) return -1;
1898
1899 ShiftAmt -= i;
1900 bool isLE = DAG.getDataLayout().isLittleEndian();
1901
1902 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1903 // Check the rest of the elements to see if they are consecutive.
1904 for (++i; i != 16; ++i)
1905 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1906 return -1;
1907 } else if (ShuffleKind == 1) {
1908 // Check the rest of the elements to see if they are consecutive.
1909 for (++i; i != 16; ++i)
1910 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1911 return -1;
1912 } else
1913 return -1;
1914
1915 if (isLE)
1916 ShiftAmt = 16 - ShiftAmt;
1917
1918 return ShiftAmt;
1919}
1920
1921/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1922/// specifies a splat of a single element that is suitable for input to
1923/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1924bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1925 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1926, __PRETTY_FUNCTION__))
1926 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1926, __PRETTY_FUNCTION__))
;
1927
1928 // The consecutive indices need to specify an element, not part of two
1929 // different elements. So abandon ship early if this isn't the case.
1930 if (N->getMaskElt(0) % EltSize != 0)
1931 return false;
1932
1933 // This is a splat operation if each element of the permute is the same, and
1934 // if the value doesn't reference the second vector.
1935 unsigned ElementBase = N->getMaskElt(0);
1936
1937 // FIXME: Handle UNDEF elements too!
1938 if (ElementBase >= 16)
1939 return false;
1940
1941 // Check that the indices are consecutive, in the case of a multi-byte element
1942 // splatted with a v16i8 mask.
1943 for (unsigned i = 1; i != EltSize; ++i)
1944 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1945 return false;
1946
1947 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1948 if (N->getMaskElt(i) < 0) continue;
1949 for (unsigned j = 0; j != EltSize; ++j)
1950 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1951 return false;
1952 }
1953 return true;
1954}
1955
1956/// Check that the mask is shuffling N byte elements. Within each N byte
1957/// element of the mask, the indices could be either in increasing or
1958/// decreasing order as long as they are consecutive.
1959/// \param[in] N the shuffle vector SD Node to analyze
1960/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1961/// Word/DoubleWord/QuadWord).
1962/// \param[in] StepLen the delta indices number among the N byte element, if
1963/// the mask is in increasing/decreasing order then it is 1/-1.
1964/// \return true iff the mask is shuffling N byte elements.
1965static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1966 int StepLen) {
1967 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1968, __PRETTY_FUNCTION__))
1968 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1968, __PRETTY_FUNCTION__))
;
1969 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1969, __PRETTY_FUNCTION__))
;
1970
1971 unsigned NumOfElem = 16 / Width;
1972 unsigned MaskVal[16]; // Width is never greater than 16
1973 for (unsigned i = 0; i < NumOfElem; ++i) {
1974 MaskVal[0] = N->getMaskElt(i * Width);
1975 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1976 return false;
1977 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1978 return false;
1979 }
1980
1981 for (unsigned int j = 1; j < Width; ++j) {
1982 MaskVal[j] = N->getMaskElt(i * Width + j);
1983 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1984 return false;
1985 }
1986 }
1987 }
1988
1989 return true;
1990}
1991
1992bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1993 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1994 if (!isNByteElemShuffleMask(N, 4, 1))
1995 return false;
1996
1997 // Now we look at mask elements 0,4,8,12
1998 unsigned M0 = N->getMaskElt(0) / 4;
1999 unsigned M1 = N->getMaskElt(4) / 4;
2000 unsigned M2 = N->getMaskElt(8) / 4;
2001 unsigned M3 = N->getMaskElt(12) / 4;
2002 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2003 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2004
2005 // Below, let H and L be arbitrary elements of the shuffle mask
2006 // where H is in the range [4,7] and L is in the range [0,3].
2007 // H, 1, 2, 3 or L, 5, 6, 7
2008 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2009 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2010 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2011 InsertAtByte = IsLE ? 12 : 0;
2012 Swap = M0 < 4;
2013 return true;
2014 }
2015 // 0, H, 2, 3 or 4, L, 6, 7
2016 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2017 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2018 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2019 InsertAtByte = IsLE ? 8 : 4;
2020 Swap = M1 < 4;
2021 return true;
2022 }
2023 // 0, 1, H, 3 or 4, 5, L, 7
2024 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2025 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2026 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2027 InsertAtByte = IsLE ? 4 : 8;
2028 Swap = M2 < 4;
2029 return true;
2030 }
2031 // 0, 1, 2, H or 4, 5, 6, L
2032 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2033 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2034 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2035 InsertAtByte = IsLE ? 0 : 12;
2036 Swap = M3 < 4;
2037 return true;
2038 }
2039
2040 // If both vector operands for the shuffle are the same vector, the mask will
2041 // contain only elements from the first one and the second one will be undef.
2042 if (N->getOperand(1).isUndef()) {
2043 ShiftElts = 0;
2044 Swap = true;
2045 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2046 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2047 InsertAtByte = IsLE ? 12 : 0;
2048 return true;
2049 }
2050 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2051 InsertAtByte = IsLE ? 8 : 4;
2052 return true;
2053 }
2054 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2055 InsertAtByte = IsLE ? 4 : 8;
2056 return true;
2057 }
2058 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2059 InsertAtByte = IsLE ? 0 : 12;
2060 return true;
2061 }
2062 }
2063
2064 return false;
2065}
2066
2067bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2068 bool &Swap, bool IsLE) {
2069 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2069, __PRETTY_FUNCTION__))
;
2070 // Ensure each byte index of the word is consecutive.
2071 if (!isNByteElemShuffleMask(N, 4, 1))
2072 return false;
2073
2074 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2075 unsigned M0 = N->getMaskElt(0) / 4;
2076 unsigned M1 = N->getMaskElt(4) / 4;
2077 unsigned M2 = N->getMaskElt(8) / 4;
2078 unsigned M3 = N->getMaskElt(12) / 4;
2079
2080 // If both vector operands for the shuffle are the same vector, the mask will
2081 // contain only elements from the first one and the second one will be undef.
2082 if (N->getOperand(1).isUndef()) {
2083 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2083, __PRETTY_FUNCTION__))
;
2084 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2085 return false;
2086
2087 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2088 Swap = false;
2089 return true;
2090 }
2091
2092 // Ensure each word index of the ShuffleVector Mask is consecutive.
2093 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2094 return false;
2095
2096 if (IsLE) {
2097 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2098 // Input vectors don't need to be swapped if the leading element
2099 // of the result is one of the 3 left elements of the second vector
2100 // (or if there is no shift to be done at all).
2101 Swap = false;
2102 ShiftElts = (8 - M0) % 8;
2103 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2104 // Input vectors need to be swapped if the leading element
2105 // of the result is one of the 3 left elements of the first vector
2106 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2107 Swap = true;
2108 ShiftElts = (4 - M0) % 4;
2109 }
2110
2111 return true;
2112 } else { // BE
2113 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2114 // Input vectors don't need to be swapped if the leading element
2115 // of the result is one of the 4 elements of the first vector.
2116 Swap = false;
2117 ShiftElts = M0;
2118 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2119 // Input vectors need to be swapped if the leading element
2120 // of the result is one of the 4 elements of the right vector.
2121 Swap = true;
2122 ShiftElts = M0 - 4;
2123 }
2124
2125 return true;
2126 }
2127}
2128
2129bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2130 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2130, __PRETTY_FUNCTION__))
;
2131
2132 if (!isNByteElemShuffleMask(N, Width, -1))
2133 return false;
2134
2135 for (int i = 0; i < 16; i += Width)
2136 if (N->getMaskElt(i) != i + Width - 1)
2137 return false;
2138
2139 return true;
2140}
2141
2142bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2143 return isXXBRShuffleMaskHelper(N, 2);
2144}
2145
2146bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2147 return isXXBRShuffleMaskHelper(N, 4);
2148}
2149
2150bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2151 return isXXBRShuffleMaskHelper(N, 8);
2152}
2153
2154bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2155 return isXXBRShuffleMaskHelper(N, 16);
2156}
2157
2158/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2159/// if the inputs to the instruction should be swapped and set \p DM to the
2160/// value for the immediate.
2161/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2162/// AND element 0 of the result comes from the first input (LE) or second input
2163/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2164/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2165/// mask.
2166bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2167 bool &Swap, bool IsLE) {
2168 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__))
;
2169
2170 // Ensure each byte index of the double word is consecutive.
2171 if (!isNByteElemShuffleMask(N, 8, 1))
2172 return false;
2173
2174 unsigned M0 = N->getMaskElt(0) / 8;
2175 unsigned M1 = N->getMaskElt(8) / 8;
2176 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2176, __PRETTY_FUNCTION__))
;
2177
2178 // If both vector operands for the shuffle are the same vector, the mask will
2179 // contain only elements from the first one and the second one will be undef.
2180 if (N->getOperand(1).isUndef()) {
2181 if ((M0 | M1) < 2) {
2182 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2183 Swap = false;
2184 return true;
2185 } else
2186 return false;
2187 }
2188
2189 if (IsLE) {
2190 if (M0 > 1 && M1 < 2) {
2191 Swap = false;
2192 } else if (M0 < 2 && M1 > 1) {
2193 M0 = (M0 + 2) % 4;
2194 M1 = (M1 + 2) % 4;
2195 Swap = true;
2196 } else
2197 return false;
2198
2199 // Note: if control flow comes here that means Swap is already set above
2200 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2201 return true;
2202 } else { // BE
2203 if (M0 < 2 && M1 > 1) {
2204 Swap = false;
2205 } else if (M0 > 1 && M1 < 2) {
2206 M0 = (M0 + 2) % 4;
2207 M1 = (M1 + 2) % 4;
2208 Swap = true;
2209 } else
2210 return false;
2211
2212 // Note: if control flow comes here that means Swap is already set above
2213 DM = (M0 << 1) + (M1 & 1);
2214 return true;
2215 }
2216}
2217
2218
2219/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2220/// appropriate for PPC mnemonics (which have a big endian bias - namely
2221/// elements are counted from the left of the vector register).
2222unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2223 SelectionDAG &DAG) {
2224 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2225 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2225, __PRETTY_FUNCTION__))
;
2226 if (DAG.getDataLayout().isLittleEndian())
2227 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2228 else
2229 return SVOp->getMaskElt(0) / EltSize;
2230}
2231
2232/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2233/// by using a vspltis[bhw] instruction of the specified element size, return
2234/// the constant being splatted. The ByteSize field indicates the number of
2235/// bytes of each element [124] -> [bhw].
2236SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2237 SDValue OpVal(nullptr, 0);
2238
2239 // If ByteSize of the splat is bigger than the element size of the
2240 // build_vector, then we have a case where we are checking for a splat where
2241 // multiple elements of the buildvector are folded together into a single
2242 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2243 unsigned EltSize = 16/N->getNumOperands();
2244 if (EltSize < ByteSize) {
2245 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2246 SDValue UniquedVals[4];
2247 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2247, __PRETTY_FUNCTION__))
;
2248
2249 // See if all of the elements in the buildvector agree across.
2250 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2251 if (N->getOperand(i).isUndef()) continue;
2252 // If the element isn't a constant, bail fully out.
2253 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2254
2255 if (!UniquedVals[i&(Multiple-1)].getNode())
2256 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2257 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2258 return SDValue(); // no match.
2259 }
2260
2261 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2262 // either constant or undef values that are identical for each chunk. See
2263 // if these chunks can form into a larger vspltis*.
2264
2265 // Check to see if all of the leading entries are either 0 or -1. If
2266 // neither, then this won't fit into the immediate field.
2267 bool LeadingZero = true;
2268 bool LeadingOnes = true;
2269 for (unsigned i = 0; i != Multiple-1; ++i) {
2270 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2271
2272 LeadingZero &= isNullConstant(UniquedVals[i]);
2273 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2274 }
2275 // Finally, check the least significant entry.
2276 if (LeadingZero) {
2277 if (!UniquedVals[Multiple-1].getNode())
2278 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2279 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2280 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2281 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2282 }
2283 if (LeadingOnes) {
2284 if (!UniquedVals[Multiple-1].getNode())
2285 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2286 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2287 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2288 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2289 }
2290
2291 return SDValue();
2292 }
2293
2294 // Check to see if this buildvec has a single non-undef value in its elements.
2295 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2296 if (N->getOperand(i).isUndef()) continue;
2297 if (!OpVal.getNode())
2298 OpVal = N->getOperand(i);
2299 else if (OpVal != N->getOperand(i))
2300 return SDValue();
2301 }
2302
2303 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2304
2305 unsigned ValSizeInBytes = EltSize;
2306 uint64_t Value = 0;
2307 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2308 Value = CN->getZExtValue();
2309 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2310 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2310, __PRETTY_FUNCTION__))
;
2311 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2312 }
2313
2314 // If the splat value is larger than the element value, then we can never do
2315 // this splat. The only case that we could fit the replicated bits into our
2316 // immediate field for would be zero, and we prefer to use vxor for it.
2317 if (ValSizeInBytes < ByteSize) return SDValue();
2318
2319 // If the element value is larger than the splat value, check if it consists
2320 // of a repeated bit pattern of size ByteSize.
2321 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2322 return SDValue();
2323
2324 // Properly sign extend the value.
2325 int MaskVal = SignExtend32(Value, ByteSize * 8);
2326
2327 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2328 if (MaskVal == 0) return SDValue();
2329
2330 // Finally, if this value fits in a 5 bit sext field, return it
2331 if (SignExtend32<5>(MaskVal) == MaskVal)
2332 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2333 return SDValue();
2334}
2335
2336/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2337/// amount, otherwise return -1.
2338int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2339 EVT VT = N->getValueType(0);
2340 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2341 return -1;
2342
2343 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2344
2345 // Find the first non-undef value in the shuffle mask.
2346 unsigned i;
2347 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2348 /*search*/;
2349
2350 if (i == 4) return -1; // all undef.
2351
2352 // Otherwise, check to see if the rest of the elements are consecutively
2353 // numbered from this value.
2354 unsigned ShiftAmt = SVOp->getMaskElt(i);
2355 if (ShiftAmt < i) return -1;
2356 ShiftAmt -= i;
2357
2358 // Check the rest of the elements to see if they are consecutive.
2359 for (++i; i != 4; ++i)
2360 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2361 return -1;
2362
2363 return ShiftAmt;
2364}
2365
2366//===----------------------------------------------------------------------===//
2367// Addressing Mode Selection
2368//===----------------------------------------------------------------------===//
2369
2370/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2371/// or 64-bit immediate, and if the value can be accurately represented as a
2372/// sign extension from a 16-bit value. If so, this returns true and the
2373/// immediate.
2374bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2375 if (!isa<ConstantSDNode>(N))
2376 return false;
2377
2378 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2379 if (N->getValueType(0) == MVT::i32)
2380 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2381 else
2382 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2383}
2384bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2385 return isIntS16Immediate(Op.getNode(), Imm);
2386}
2387
2388
2389/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2390/// be represented as an indexed [r+r] operation.
2391bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2392 SDValue &Index,
2393 SelectionDAG &DAG) const {
2394 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2395 UI != E; ++UI) {
2396 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2397 if (Memop->getMemoryVT() == MVT::f64) {
2398 Base = N.getOperand(0);
2399 Index = N.getOperand(1);
2400 return true;
2401 }
2402 }
2403 }
2404 return false;
2405}
2406
2407/// SelectAddressRegReg - Given the specified addressed, check to see if it
2408/// can be represented as an indexed [r+r] operation. Returns false if it
2409/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2410/// non-zero and N can be represented by a base register plus a signed 16-bit
2411/// displacement, make a more precise judgement by checking (displacement % \p
2412/// EncodingAlignment).
2413bool PPCTargetLowering::SelectAddressRegReg(
2414 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2415 MaybeAlign EncodingAlignment) const {
2416 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2417 // a [pc+imm].
2418 if (SelectAddressPCRel(N, Base))
2419 return false;
2420
2421 int16_t Imm = 0;
2422 if (N.getOpcode() == ISD::ADD) {
2423 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2424 // SPE load/store can only handle 8-bit offsets.
2425 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2426 return true;
2427 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2428 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2429 return false; // r+i
2430 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2431 return false; // r+i
2432
2433 Base = N.getOperand(0);
2434 Index = N.getOperand(1);
2435 return true;
2436 } else if (N.getOpcode() == ISD::OR) {
2437 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2438 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2439 return false; // r+i can fold it if we can.
2440
2441 // If this is an or of disjoint bitfields, we can codegen this as an add
2442 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2443 // disjoint.
2444 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2445
2446 if (LHSKnown.Zero.getBoolValue()) {
2447 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2448 // If all of the bits are known zero on the LHS or RHS, the add won't
2449 // carry.
2450 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2451 Base = N.getOperand(0);
2452 Index = N.getOperand(1);
2453 return true;
2454 }
2455 }
2456 }
2457
2458 return false;
2459}
2460
2461// If we happen to be doing an i64 load or store into a stack slot that has
2462// less than a 4-byte alignment, then the frame-index elimination may need to
2463// use an indexed load or store instruction (because the offset may not be a
2464// multiple of 4). The extra register needed to hold the offset comes from the
2465// register scavenger, and it is possible that the scavenger will need to use
2466// an emergency spill slot. As a result, we need to make sure that a spill slot
2467// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2468// stack slot.
2469static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2470 // FIXME: This does not handle the LWA case.
2471 if (VT != MVT::i64)
2472 return;
2473
2474 // NOTE: We'll exclude negative FIs here, which come from argument
2475 // lowering, because there are no known test cases triggering this problem
2476 // using packed structures (or similar). We can remove this exclusion if
2477 // we find such a test case. The reason why this is so test-case driven is
2478 // because this entire 'fixup' is only to prevent crashes (from the
2479 // register scavenger) on not-really-valid inputs. For example, if we have:
2480 // %a = alloca i1
2481 // %b = bitcast i1* %a to i64*
2482 // store i64* a, i64 b
2483 // then the store should really be marked as 'align 1', but is not. If it
2484 // were marked as 'align 1' then the indexed form would have been
2485 // instruction-selected initially, and the problem this 'fixup' is preventing
2486 // won't happen regardless.
2487 if (FrameIdx < 0)
2488 return;
2489
2490 MachineFunction &MF = DAG.getMachineFunction();
2491 MachineFrameInfo &MFI = MF.getFrameInfo();
2492
2493 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2494 return;
2495
2496 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2497 FuncInfo->setHasNonRISpills();
2498}
2499
2500/// Returns true if the address N can be represented by a base register plus
2501/// a signed 16-bit displacement [r+imm], and if it is not better
2502/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2503/// displacements that are multiples of that value.
2504bool PPCTargetLowering::SelectAddressRegImm(
2505 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2506 MaybeAlign EncodingAlignment) const {
2507 // FIXME dl should come from parent load or store, not from address
2508 SDLoc dl(N);
2509
2510 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2511 // a [pc+imm].
2512 if (SelectAddressPCRel(N, Base))
2513 return false;
2514
2515 // If this can be more profitably realized as r+r, fail.
2516 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2517 return false;
2518
2519 if (N.getOpcode() == ISD::ADD) {
2520 int16_t imm = 0;
2521 if (isIntS16Immediate(N.getOperand(1), imm) &&
2522 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2523 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2524 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2525 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2526 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2527 } else {
2528 Base = N.getOperand(0);
2529 }
2530 return true; // [r+i]
2531 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2532 // Match LOAD (ADD (X, Lo(G))).
2533 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2534 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
;
2535 Disp = N.getOperand(1).getOperand(0); // The global address.
2536 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
2537 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
2538 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
2539 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2539, __PRETTY_FUNCTION__))
;
2540 Base = N.getOperand(0);
2541 return true; // [&g+r]
2542 }
2543 } else if (N.getOpcode() == ISD::OR) {
2544 int16_t imm = 0;
2545 if (isIntS16Immediate(N.getOperand(1), imm) &&
2546 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2547 // If this is an or of disjoint bitfields, we can codegen this as an add
2548 // (for better address arithmetic) if the LHS and RHS of the OR are
2549 // provably disjoint.
2550 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2551
2552 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2553 // If all of the bits are known zero on the LHS or RHS, the add won't
2554 // carry.
2555 if (FrameIndexSDNode *FI =
2556 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2557 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2558 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2559 } else {
2560 Base = N.getOperand(0);
2561 }
2562 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2563 return true;
2564 }
2565 }
2566 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2567 // Loading from a constant address.
2568
2569 // If this address fits entirely in a 16-bit sext immediate field, codegen
2570 // this as "d, 0"
2571 int16_t Imm;
2572 if (isIntS16Immediate(CN, Imm) &&
2573 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2574 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2575 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2576 CN->getValueType(0));
2577 return true;
2578 }
2579
2580 // Handle 32-bit sext immediates with LIS + addr mode.
2581 if ((CN->getValueType(0) == MVT::i32 ||
2582 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2583 (!EncodingAlignment ||
2584 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2585 int Addr = (int)CN->getZExtValue();
2586
2587 // Otherwise, break this down into an LIS + disp.
2588 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2589
2590 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2591 MVT::i32);
2592 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2593 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2594 return true;
2595 }
2596 }
2597
2598 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2599 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2600 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2601 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2602 } else
2603 Base = N;
2604 return true; // [r+0]
2605}
2606
2607/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2608/// represented as an indexed [r+r] operation.
2609bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2610 SDValue &Index,
2611 SelectionDAG &DAG) const {
2612 // Check to see if we can easily represent this as an [r+r] address. This
2613 // will fail if it thinks that the address is more profitably represented as
2614 // reg+imm, e.g. where imm = 0.
2615 if (SelectAddressRegReg(N, Base, Index, DAG))
2616 return true;
2617
2618 // If the address is the result of an add, we will utilize the fact that the
2619 // address calculation includes an implicit add. However, we can reduce
2620 // register pressure if we do not materialize a constant just for use as the
2621 // index register. We only get rid of the add if it is not an add of a
2622 // value and a 16-bit signed constant and both have a single use.
2623 int16_t imm = 0;
2624 if (N.getOpcode() == ISD::ADD &&
2625 (!isIntS16Immediate(N.getOperand(1), imm) ||
2626 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2627 Base = N.getOperand(0);
2628 Index = N.getOperand(1);
2629 return true;
2630 }
2631
2632 // Otherwise, do it the hard way, using R0 as the base register.
2633 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2634 N.getValueType());
2635 Index = N;
2636 return true;
2637}
2638
2639template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2640 Ty *PCRelCand = dyn_cast<Ty>(N);
2641 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2642}
2643
2644/// Returns true if this address is a PC Relative address.
2645/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2646/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2647bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2648 // This is a materialize PC Relative node. Always select this as PC Relative.
2649 Base = N;
2650 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2651 return true;
2652 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2653 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2654 isValidPCRelNode<JumpTableSDNode>(N) ||
2655 isValidPCRelNode<BlockAddressSDNode>(N))
2656 return true;
2657 return false;
2658}
2659
2660/// Returns true if we should use a direct load into vector instruction
2661/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2662static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2663
2664 // If there are any other uses other than scalar to vector, then we should
2665 // keep it as a scalar load -> direct move pattern to prevent multiple
2666 // loads.
2667 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2668 if (!LD)
2669 return false;
2670
2671 EVT MemVT = LD->getMemoryVT();
2672 if (!MemVT.isSimple())
2673 return false;
2674 switch(MemVT.getSimpleVT().SimpleTy) {
2675 case MVT::i64:
2676 break;
2677 case MVT::i32:
2678 if (!ST.hasP8Vector())
2679 return false;
2680 break;
2681 case MVT::i16:
2682 case MVT::i8:
2683 if (!ST.hasP9Vector())
2684 return false;
2685 break;
2686 default:
2687 return false;
2688 }
2689
2690 SDValue LoadedVal(N, 0);
2691 if (!LoadedVal.hasOneUse())
2692 return false;
2693
2694 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2695 UI != UE; ++UI)
2696 if (UI.getUse().get().getResNo() == 0 &&
2697 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2698 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2699 return false;
2700
2701 return true;
2702}
2703
2704/// getPreIndexedAddressParts - returns true by value, base pointer and
2705/// offset pointer and addressing mode by reference if the node's address
2706/// can be legally represented as pre-indexed load / store address.
2707bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2708 SDValue &Offset,
2709 ISD::MemIndexedMode &AM,
2710 SelectionDAG &DAG) const {
2711 if (DisablePPCPreinc) return false;
2712
2713 bool isLoad = true;
2714 SDValue Ptr;
2715 EVT VT;
2716 unsigned Alignment;
2717 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2718 Ptr = LD->getBasePtr();
2719 VT = LD->getMemoryVT();
2720 Alignment = LD->getAlignment();
2721 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2722 Ptr = ST->getBasePtr();
2723 VT = ST->getMemoryVT();
2724 Alignment = ST->getAlignment();
2725 isLoad = false;
2726 } else
2727 return false;
2728
2729 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2730 // instructions because we can fold these into a more efficient instruction
2731 // instead, (such as LXSD).
2732 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2733 return false;
2734 }
2735
2736 // PowerPC doesn't have preinc load/store instructions for vectors
2737 if (VT.isVector())
2738 return false;
2739
2740 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2741 // Common code will reject creating a pre-inc form if the base pointer
2742 // is a frame index, or if N is a store and the base pointer is either
2743 // the same as or a predecessor of the value being stored. Check for
2744 // those situations here, and try with swapped Base/Offset instead.
2745 bool Swap = false;
2746
2747 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2748 Swap = true;
2749 else if (!isLoad) {
2750 SDValue Val = cast<StoreSDNode>(N)->getValue();
2751 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2752 Swap = true;
2753 }
2754
2755 if (Swap)
2756 std::swap(Base, Offset);
2757
2758 AM = ISD::PRE_INC;
2759 return true;
2760 }
2761
2762 // LDU/STU can only handle immediates that are a multiple of 4.
2763 if (VT != MVT::i64) {
2764 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2765 return false;
2766 } else {
2767 // LDU/STU need an address with at least 4-byte alignment.
2768 if (Alignment < 4)
2769 return false;
2770
2771 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2772 return false;
2773 }
2774
2775 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2776 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2777 // sext i32 to i64 when addr mode is r+i.
2778 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2779 LD->getExtensionType() == ISD::SEXTLOAD &&
2780 isa<ConstantSDNode>(Offset))
2781 return false;
2782 }
2783
2784 AM = ISD::PRE_INC;
2785 return true;
2786}
2787
2788//===----------------------------------------------------------------------===//
2789// LowerOperation implementation
2790//===----------------------------------------------------------------------===//
2791
2792/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2793/// and LoOpFlags to the target MO flags.
2794static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2795 unsigned &HiOpFlags, unsigned &LoOpFlags,
2796 const GlobalValue *GV = nullptr) {
2797 HiOpFlags = PPCII::MO_HA;
2798 LoOpFlags = PPCII::MO_LO;
2799
2800 // Don't use the pic base if not in PIC relocation model.
2801 if (IsPIC) {
2802 HiOpFlags |= PPCII::MO_PIC_FLAG;
2803 LoOpFlags |= PPCII::MO_PIC_FLAG;
2804 }
2805}
2806
2807static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2808 SelectionDAG &DAG) {
2809 SDLoc DL(HiPart);
2810 EVT PtrVT = HiPart.getValueType();
2811 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2812
2813 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2814 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2815
2816 // With PIC, the first instruction is actually "GR+hi(&G)".
2817 if (isPIC)
2818 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2819 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2820
2821 // Generate non-pic code that has direct accesses to the constant pool.
2822 // The address of the global is just (hi(&g)+lo(&g)).
2823 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2824}
2825
2826static void setUsesTOCBasePtr(MachineFunction &MF) {
2827 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2828 FuncInfo->setUsesTOCBasePtr();
2829}
2830
2831static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2832 setUsesTOCBasePtr(DAG.getMachineFunction());
2833}
2834
2835SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2836 SDValue GA) const {
2837 const bool Is64Bit = Subtarget.isPPC64();
2838 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2839 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2840 : Subtarget.isAIXABI()
2841 ? DAG.getRegister(PPC::R2, VT)
2842 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2843 SDValue Ops[] = { GA, Reg };
2844 return DAG.getMemIntrinsicNode(
2845 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2846 MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2847 MachineMemOperand::MOLoad);
2848}
2849
2850SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2851 SelectionDAG &DAG) const {
2852 EVT PtrVT = Op.getValueType();
2853 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2854 const Constant *C = CP->getConstVal();
2855
2856 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2857 // The actual address of the GlobalValue is stored in the TOC.
2858 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2859 if (Subtarget.isUsingPCRelativeCalls()) {
2860 SDLoc DL(CP);
2861 EVT Ty = getPointerTy(DAG.getDataLayout());
2862 SDValue ConstPool = DAG.getTargetConstantPool(
2863 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2864 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2865 }
2866 setUsesTOCBasePtr(DAG);
2867 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2868 return getTOCEntry(DAG, SDLoc(CP), GA);
2869 }
2870
2871 unsigned MOHiFlag, MOLoFlag;
2872 bool IsPIC = isPositionIndependent();
2873 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2874
2875 if (IsPIC && Subtarget.isSVR4ABI()) {
2876 SDValue GA =
2877 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2878 return getTOCEntry(DAG, SDLoc(CP), GA);
2879 }
2880
2881 SDValue CPIHi =
2882 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2883 SDValue CPILo =
2884 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2885 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2886}
2887
2888// For 64-bit PowerPC, prefer the more compact relative encodings.
2889// This trades 32 bits per jump table entry for one or two instructions
2890// on the jump site.
2891unsigned PPCTargetLowering::getJumpTableEncoding() const {
2892 if (isJumpTableRelative())
2893 return MachineJumpTableInfo::EK_LabelDifference32;
2894
2895 return TargetLowering::getJumpTableEncoding();
2896}
2897
2898bool PPCTargetLowering::isJumpTableRelative() const {
2899 if (UseAbsoluteJumpTables)
2900 return false;
2901 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2902 return true;
2903 return TargetLowering::isJumpTableRelative();
2904}
2905
2906SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2907 SelectionDAG &DAG) const {
2908 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2909 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2910
2911 switch (getTargetMachine().getCodeModel()) {
2912 case CodeModel::Small:
2913 case CodeModel::Medium:
2914 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2915 default:
2916 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2917 getPointerTy(DAG.getDataLayout()));
2918 }
2919}
2920
2921const MCExpr *
2922PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2923 unsigned JTI,
2924 MCContext &Ctx) const {
2925 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2926 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2927
2928 switch (getTargetMachine().getCodeModel()) {
2929 case CodeModel::Small:
2930 case CodeModel::Medium:
2931 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2932 default:
2933 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2934 }
2935}
2936
2937SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2938 EVT PtrVT = Op.getValueType();
2939 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2940
2941 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2942 if (Subtarget.isUsingPCRelativeCalls()) {
2943 SDLoc DL(JT);
2944 EVT Ty = getPointerTy(DAG.getDataLayout());
2945 SDValue GA =
2946 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2947 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2948 return MatAddr;
2949 }
2950
2951 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2952 // The actual address of the GlobalValue is stored in the TOC.
2953 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2954 setUsesTOCBasePtr(DAG);
2955 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2956 return getTOCEntry(DAG, SDLoc(JT), GA);
2957 }
2958
2959 unsigned MOHiFlag, MOLoFlag;
2960 bool IsPIC = isPositionIndependent();
2961 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2962
2963 if (IsPIC && Subtarget.isSVR4ABI()) {
2964 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2965 PPCII::MO_PIC_FLAG);
2966 return getTOCEntry(DAG, SDLoc(GA), GA);
2967 }
2968
2969 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2970 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2971 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2972}
2973
2974SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2975 SelectionDAG &DAG) const {
2976 EVT PtrVT = Op.getValueType();
2977 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2978 const BlockAddress *BA = BASDN->getBlockAddress();
2979
2980 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2981 if (Subtarget.isUsingPCRelativeCalls()) {
2982 SDLoc DL(BASDN);
2983 EVT Ty = getPointerTy(DAG.getDataLayout());
2984 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2985 PPCII::MO_PCREL_FLAG);
2986 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2987 return MatAddr;
2988 }
2989
2990 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2991 // The actual BlockAddress is stored in the TOC.
2992 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2993 setUsesTOCBasePtr(DAG);
2994 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2995 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2996 }
2997
2998 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2999 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3000 return getTOCEntry(
3001 DAG, SDLoc(BASDN),
3002 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3003
3004 unsigned MOHiFlag, MOLoFlag;
3005 bool IsPIC = isPositionIndependent();
3006 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3007 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3008 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3009 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3010}
3011
3012SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3013 SelectionDAG &DAG) const {
3014 // FIXME: TLS addresses currently use medium model code sequences,
3015 // which is the most useful form. Eventually support for small and
3016 // large models could be added if users need it, at the cost of
3017 // additional complexity.
3018 if (Subtarget.isUsingPCRelativeCalls() && !EnablePPCPCRelTLS)
3019 report_fatal_error("Thread local storage is not supported with pc-relative"
3020 " addressing - please compile with -mno-pcrel");
3021 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3022 if (DAG.getTarget().useEmulatedTLS())
3023 return LowerToTLSEmulatedModel(GA, DAG);
3024
3025 SDLoc dl(GA);
3026 const GlobalValue *GV = GA->getGlobal();
3027 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3028 bool is64bit = Subtarget.isPPC64();
3029 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3030 PICLevel::Level picLevel = M->getPICLevel();
3031
3032 const TargetMachine &TM = getTargetMachine();
3033 TLSModel::Model Model = TM.getTLSModel(GV);
3034
3035 if (Model == TLSModel::LocalExec) {
3036 if (Subtarget.isUsingPCRelativeCalls()) {
3037 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3038 SDValue TGA = DAG.getTargetGlobalAddress(
3039 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3040 SDValue MatAddr =
3041 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3042 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3043 }
3044
3045 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3046 PPCII::MO_TPREL_HA);
3047 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3048 PPCII::MO_TPREL_LO);
3049 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3050 : DAG.getRegister(PPC::R2, MVT::i32);
3051
3052 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3053 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3054 }
3055
3056 if (Model == TLSModel::InitialExec) {
3057 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3058 SDValue TGA = DAG.getTargetGlobalAddress(
3059 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3060 SDValue TGATLS = DAG.getTargetGlobalAddress(
3061 GV, dl, PtrVT, 0,
3062 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3063 SDValue TPOffset;
3064 if (IsPCRel) {
3065 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3066 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3067 MachinePointerInfo());
3068 } else {
3069 SDValue GOTPtr;
3070 if (is64bit) {
3071 setUsesTOCBasePtr(DAG);
3072 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3073 GOTPtr =
3074 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3075 } else {
3076 if (!TM.isPositionIndependent())
3077 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3078 else if (picLevel == PICLevel::SmallPIC)
3079 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3080 else
3081 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3082 }
3083 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3084 }
3085 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3086 }
3087
3088 if (Model == TLSModel::GeneralDynamic) {
3089 if (Subtarget.isUsingPCRelativeCalls()) {
3090 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3091 PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3092 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3093 }
3094
3095 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3096 SDValue GOTPtr;
3097 if (is64bit) {
3098 setUsesTOCBasePtr(DAG);
3099 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3100 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3101 GOTReg, TGA);
3102 } else {
3103 if (picLevel == PICLevel::SmallPIC)
3104 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3105 else
3106 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3107 }
3108 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3109 GOTPtr, TGA, TGA);
3110 }
3111
3112 if (Model == TLSModel::LocalDynamic) {
3113 if (Subtarget.isUsingPCRelativeCalls()) {
3114 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3115 PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3116 SDValue MatPCRel =
3117 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3118 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3119 }
3120
3121 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3122 SDValue GOTPtr;
3123 if (is64bit) {
3124 setUsesTOCBasePtr(DAG);
3125 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3126 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3127 GOTReg, TGA);
3128 } else {
3129 if (picLevel == PICLevel::SmallPIC)
3130 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3131 else
3132 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3133 }
3134 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3135 PtrVT, GOTPtr, TGA, TGA);
3136 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3137 PtrVT, TLSAddr, TGA);
3138 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3139 }
3140
3141 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3141)
;
3142}
3143
3144SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3145 SelectionDAG &DAG) const {
3146 EVT PtrVT = Op.getValueType();
3147 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3148 SDLoc DL(GSDN);
3149 const GlobalValue *GV = GSDN->getGlobal();
3150
3151 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3152 // The actual address of the GlobalValue is stored in the TOC.
3153 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3154 if (Subtarget.isUsingPCRelativeCalls()) {
3155 EVT Ty = getPointerTy(DAG.getDataLayout());
3156 if (isAccessedAsGotIndirect(Op)) {
3157 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3158 PPCII::MO_PCREL_FLAG |
3159 PPCII::MO_GOT_FLAG);
3160 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3161 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3162 MachinePointerInfo());
3163 return Load;
3164 } else {
3165 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3166 PPCII::MO_PCREL_FLAG);
3167 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3168 }
3169 }
3170 setUsesTOCBasePtr(DAG);
3171 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3172 return getTOCEntry(DAG, DL, GA);
3173 }
3174
3175 unsigned MOHiFlag, MOLoFlag;
3176 bool IsPIC = isPositionIndependent();
3177 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3178
3179 if (IsPIC && Subtarget.isSVR4ABI()) {
3180 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3181 GSDN->getOffset(),
3182 PPCII::MO_PIC_FLAG);
3183 return getTOCEntry(DAG, DL, GA);
3184 }
3185
3186 SDValue GAHi =
3187 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3188 SDValue GALo =
3189 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3190
3191 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3192}
3193
3194SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3195 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3196 SDLoc dl(Op);
3197
3198 if (Op.getValueType() == MVT::v2i64) {
3199 // When the operands themselves are v2i64 values, we need to do something
3200 // special because VSX has no underlying comparison operations for these.
3201 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3202 // Equality can be handled by casting to the legal type for Altivec
3203 // comparisons, everything else needs to be expanded.
3204 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3205 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3206 DAG.getSetCC(dl, MVT::v4i32,
3207 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3208 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3209 CC));
3210 }
3211
3212 return SDValue();
3213 }
3214
3215 // We handle most of these in the usual way.
3216 return Op;
3217 }
3218
3219 // If we're comparing for equality to zero, expose the fact that this is
3220 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3221 // fold the new nodes.
3222 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3223 return V;
3224
3225 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3226 // Leave comparisons against 0 and -1 alone for now, since they're usually
3227 // optimized. FIXME: revisit this when we can custom lower all setcc
3228 // optimizations.
3229 if (C->isAllOnesValue() || C->isNullValue())
3230 return SDValue();
3231 }
3232
3233 // If we have an integer seteq/setne, turn it into a compare against zero
3234 // by xor'ing the rhs with the lhs, which is faster than setting a
3235 // condition register, reading it back out, and masking the correct bit. The
3236 // normal approach here uses sub to do this instead of xor. Using xor exposes
3237 // the result to other bit-twiddling opportunities.
3238 EVT LHSVT = Op.getOperand(0).getValueType();
3239 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3240 EVT VT = Op.getValueType();
3241 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3242 Op.getOperand(1));
3243 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3244 }
3245 return SDValue();
3246}
3247
3248SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3249 SDNode *Node = Op.getNode();
3250 EVT VT = Node->getValueType(0);
3251 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3252 SDValue InChain = Node->getOperand(0);
3253 SDValue VAListPtr = Node->getOperand(1);
3254 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3255 SDLoc dl(Node);
3256
3257 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3257, __PRETTY_FUNCTION__))
;
3258
3259 // gpr_index
3260 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3261 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3262 InChain = GprIndex.getValue(1);
3263
3264 if (VT == MVT::i64) {
3265 // Check if GprIndex is even
3266 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3267 DAG.getConstant(1, dl, MVT::i32));
3268 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3269 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3270 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3271 DAG.getConstant(1, dl, MVT::i32));
3272 // Align GprIndex to be even if it isn't
3273 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3274 GprIndex);
3275 }
3276
3277 // fpr index is 1 byte after gpr
3278 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3279 DAG.getConstant(1, dl, MVT::i32));
3280
3281 // fpr
3282 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3283 FprPtr, MachinePointerInfo(SV), MVT::i8);
3284 InChain = FprIndex.getValue(1);
3285
3286 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3287 DAG.getConstant(8, dl, MVT::i32));
3288
3289 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3290 DAG.getConstant(4, dl, MVT::i32));
3291
3292 // areas
3293 SDValue OverflowArea =
3294 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3295 InChain = OverflowArea.getValue(1);
3296
3297 SDValue RegSaveArea =
3298 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3299 InChain = RegSaveArea.getValue(1);
3300
3301 // select overflow_area if index > 8
3302 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3303 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3304
3305 // adjustment constant gpr_index * 4/8
3306 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3307 VT.isInteger() ? GprIndex : FprIndex,
3308 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3309 MVT::i32));
3310
3311 // OurReg = RegSaveArea + RegConstant
3312 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3313 RegConstant);
3314
3315 // Floating types are 32 bytes into RegSaveArea
3316 if (VT.isFloatingPoint())
3317 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3318 DAG.getConstant(32, dl, MVT::i32));
3319
3320 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3321 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3322 VT.isInteger() ? GprIndex : FprIndex,
3323 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3324 MVT::i32));
3325
3326 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3327 VT.isInteger() ? VAListPtr : FprPtr,
3328 MachinePointerInfo(SV), MVT::i8);
3329
3330 // determine if we should load from reg_save_area or overflow_area
3331 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3332
3333 // increase overflow_area by 4/8 if gpr/fpr > 8
3334 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3335 DAG.getConstant(VT.isInteger() ? 4 : 8,
3336 dl, MVT::i32));
3337
3338 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3339 OverflowAreaPlusN);
3340
3341 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3342 MachinePointerInfo(), MVT::i32);
3343
3344 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3345}
3346
3347SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3348 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3348, __PRETTY_FUNCTION__))
;
3349
3350 // We have to copy the entire va_list struct:
3351 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3352 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3353 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3354 false, true, false, MachinePointerInfo(),
3355 MachinePointerInfo());
3356}
3357
3358SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3359 SelectionDAG &DAG) const {
3360 if (Subtarget.isAIXABI())
3361 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3362
3363 return Op.getOperand(0);
3364}
3365
3366SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3367 SelectionDAG &DAG) const {
3368 if (Subtarget.isAIXABI())
3369 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3370
3371 SDValue Chain = Op.getOperand(0);
3372 SDValue Trmp = Op.getOperand(1); // trampoline
3373 SDValue FPtr = Op.getOperand(2); // nested function
3374 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3375 SDLoc dl(Op);
3376
3377 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3378 bool isPPC64 = (PtrVT == MVT::i64);
3379 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3380
3381 TargetLowering::ArgListTy Args;
3382 TargetLowering::ArgListEntry Entry;
3383
3384 Entry.Ty = IntPtrTy;
3385 Entry.Node = Trmp; Args.push_back(Entry);
3386
3387 // TrampSize == (isPPC64 ? 48 : 40);
3388 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3389 isPPC64 ? MVT::i64 : MVT::i32);
3390 Args.push_back(Entry);
3391
3392 Entry.Node = FPtr; Args.push_back(Entry);
3393 Entry.Node = Nest; Args.push_back(Entry);
3394
3395 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3396 TargetLowering::CallLoweringInfo CLI(DAG);
3397 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3398 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3399 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3400
3401 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3402 return CallResult.second;
3403}
3404
3405SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3406 MachineFunction &MF = DAG.getMachineFunction();
3407 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3408 EVT PtrVT = getPointerTy(MF.getDataLayout());
3409
3410 SDLoc dl(Op);
3411
3412 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3413 // vastart just stores the address of the VarArgsFrameIndex slot into the
3414 // memory location argument.
3415 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3416 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3417 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3418 MachinePointerInfo(SV));
3419 }
3420
3421 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3422 // We suppose the given va_list is already allocated.
3423 //
3424 // typedef struct {
3425 // char gpr; /* index into the array of 8 GPRs
3426 // * stored in the register save area
3427 // * gpr=0 corresponds to r3,
3428 // * gpr=1 to r4, etc.
3429 // */
3430 // char fpr; /* index into the array of 8 FPRs
3431 // * stored in the register save area
3432 // * fpr=0 corresponds to f1,
3433 // * fpr=1 to f2, etc.
3434 // */
3435 // char *overflow_arg_area;
3436 // /* location on stack that holds
3437 // * the next overflow argument
3438 // */
3439 // char *reg_save_area;
3440 // /* where r3:r10 and f1:f8 (if saved)
3441 // * are stored
3442 // */
3443 // } va_list[1];
3444
3445 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3446 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3447 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3448 PtrVT);
3449 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3450 PtrVT);
3451
3452 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3453 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3454
3455 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3456 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3457
3458 uint64_t FPROffset = 1;
3459 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3460
3461 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3462
3463 // Store first byte : number of int regs
3464 SDValue firstStore =
3465 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3466 MachinePointerInfo(SV), MVT::i8);
3467 uint64_t nextOffset = FPROffset;
3468 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3469 ConstFPROffset);
3470
3471 // Store second byte : number of float regs
3472 SDValue secondStore =
3473 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3474 MachinePointerInfo(SV, nextOffset), MVT::i8);
3475 nextOffset += StackOffset;
3476 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3477
3478 // Store second word : arguments given on stack
3479 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3480 MachinePointerInfo(SV, nextOffset));
3481 nextOffset += FrameOffset;
3482 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3483
3484 // Store third word : arguments given in registers
3485 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3486 MachinePointerInfo(SV, nextOffset));
3487}
3488
3489/// FPR - The set of FP registers that should be allocated for arguments
3490/// on Darwin and AIX.
3491static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3492 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3493 PPC::F11, PPC::F12, PPC::F13};
3494
3495/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3496/// the stack.
3497static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3498 unsigned PtrByteSize) {
3499 unsigned ArgSize = ArgVT.getStoreSize();
3500 if (Flags.isByVal())
3501 ArgSize = Flags.getByValSize();
3502
3503 // Round up to multiples of the pointer size, except for array members,
3504 // which are always packed.
3505 if (!Flags.isInConsecutiveRegs())
3506 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3507
3508 return ArgSize;
3509}
3510
3511/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3512/// on the stack.
3513static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3514 ISD::ArgFlagsTy Flags,
3515 unsigned PtrByteSize) {
3516 Align Alignment(PtrByteSize);
3517
3518 // Altivec parameters are padded to a 16 byte boundary.
3519 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3520 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3521 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3522 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3523 Alignment = Align(16);
3524
3525 // ByVal parameters are aligned as requested.
3526 if (Flags.isByVal()) {
3527 auto BVAlign = Flags.getNonZeroByValAlign();
3528 if (BVAlign > PtrByteSize) {
3529 if (BVAlign.value() % PtrByteSize != 0)
3530 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3531)
3531 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3531)
;
3532
3533 Alignment = BVAlign;
3534 }
3535 }
3536
3537 // Array members are always packed to their original alignment.
3538 if (Flags.isInConsecutiveRegs()) {
3539 // If the array member was split into multiple registers, the first
3540 // needs to be aligned to the size of the full type. (Except for
3541 // ppcf128, which is only aligned as its f64 components.)
3542 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3543 Alignment = Align(OrigVT.getStoreSize());
3544 else
3545 Alignment = Align(ArgVT.getStoreSize());
3546 }
3547
3548 return Alignment;
3549}
3550
3551/// CalculateStackSlotUsed - Return whether this argument will use its
3552/// stack slot (instead of being passed in registers). ArgOffset,
3553/// AvailableFPRs, and AvailableVRs must hold the current argument
3554/// position, and will be updated to account for this argument.
3555static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3556 unsigned PtrByteSize, unsigned LinkageSize,
3557 unsigned ParamAreaSize, unsigned &ArgOffset,
3558 unsigned &AvailableFPRs,
3559 unsigned &AvailableVRs) {
3560 bool UseMemory = false;
3561
3562 // Respect alignment of argument on the stack.
3563 Align Alignment =
3564 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3565 ArgOffset = alignTo(ArgOffset, Alignment);
3566 // If there's no space left in the argument save area, we must
3567 // use memory (this check also catches zero-sized arguments).
3568 if (ArgOffset >= LinkageSize + ParamAreaSize)
3569 UseMemory = true;
3570
3571 // Allocate argument on the stack.
3572 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3573 if (Flags.isInConsecutiveRegsLast())
3574 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3575 // If we overran the argument save area, we must use memory
3576 // (this check catches arguments passed partially in memory)
3577 if (ArgOffset > LinkageSize + ParamAreaSize)
3578 UseMemory = true;
3579
3580 // However, if the argument is actually passed in an FPR or a VR,
3581 // we don't use memory after all.
3582 if (!Flags.isByVal()) {
3583 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3584 if (AvailableFPRs > 0) {
3585 --AvailableFPRs;
3586 return false;
3587 }
3588 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3589 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3590 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3591 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3592 if (AvailableVRs > 0) {
3593 --AvailableVRs;
3594 return false;
3595 }
3596 }
3597
3598 return UseMemory;
3599}
3600
3601/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3602/// ensure minimum alignment required for target.
3603static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3604 unsigned NumBytes) {
3605 return alignTo(NumBytes, Lowering->getStackAlign());
3606}
3607
3608SDValue PPCTargetLowering::LowerFormalArguments(
3609 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3610 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3611 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3612 if (Subtarget.isAIXABI())
3613 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3614 InVals);
3615 if (Subtarget.is64BitELFABI())
3616 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3617 InVals);
3618 if (Subtarget.is32BitELFABI())
3619 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3620 InVals);
3621
3622 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3623 InVals);
3624}
3625
3626SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3627 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3628 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3629 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3630
3631 // 32-bit SVR4 ABI Stack Frame Layout:
3632 // +-----------------------------------+
3633 // +--> | Back chain |
3634 // | +-----------------------------------+
3635 // | | Floating-point register save area |
3636 // | +-----------------------------------+
3637 // | | General register save area |
3638 // | +-----------------------------------+
3639 // | | CR save word |
3640 // | +-----------------------------------+
3641 // | | VRSAVE save word |
3642 // | +-----------------------------------+
3643 // | | Alignment padding |
3644 // | +-----------------------------------+
3645 // | | Vector register save area |
3646 // | +-----------------------------------+
3647 // | | Local variable space |
3648 // | +-----------------------------------+
3649 // | | Parameter list area |
3650 // | +-----------------------------------+
3651 // | | LR save word |
3652 // | +-----------------------------------+
3653 // SP--> +--- | Back chain |
3654 // +-----------------------------------+
3655 //
3656 // Specifications:
3657 // System V Application Binary Interface PowerPC Processor Supplement
3658 // AltiVec Technology Programming Interface Manual
3659
3660 MachineFunction &MF = DAG.getMachineFunction();
3661 MachineFrameInfo &MFI = MF.getFrameInfo();
3662 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3663
3664 EVT PtrVT = getPointerTy(MF.getDataLayout());
3665 // Potential tail calls could cause overwriting of argument stack slots.
3666 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3667 (CallConv == CallingConv::Fast));
3668 const Align PtrAlign(4);
3669
3670 // Assign locations to all of the incoming arguments.
3671 SmallVector<CCValAssign, 16> ArgLocs;
3672 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3673 *DAG.getContext());
3674
3675 // Reserve space for the linkage area on the stack.
3676 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3677 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3678 if (useSoftFloat())
3679 CCInfo.PreAnalyzeFormalArguments(Ins);
3680
3681 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3682 CCInfo.clearWasPPCF128();
3683
3684 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3685 CCValAssign &VA = ArgLocs[i];
3686
3687 // Arguments stored in registers.
3688 if (VA.isRegLoc()) {
3689 const TargetRegisterClass *RC;
3690 EVT ValVT = VA.getValVT();
3691
3692 switch (ValVT.getSimpleVT().SimpleTy) {
3693 default:
3694 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3694)
;
3695 case MVT::i1:
3696 case MVT::i32:
3697 RC = &PPC::GPRCRegClass;
3698 break;
3699 case MVT::f32:
3700 if (Subtarget.hasP8Vector())
3701 RC = &PPC::VSSRCRegClass;
3702 else if (Subtarget.hasSPE())
3703 RC = &PPC::GPRCRegClass;
3704 else
3705 RC = &PPC::F4RCRegClass;
3706 break;
3707 case MVT::f64:
3708 if (Subtarget.hasVSX())
3709 RC = &PPC::VSFRCRegClass;
3710 else if (Subtarget.hasSPE())
3711 // SPE passes doubles in GPR pairs.
3712 RC = &PPC::GPRCRegClass;
3713 else
3714 RC = &PPC::F8RCRegClass;
3715 break;
3716 case MVT::v16i8:
3717 case MVT::v8i16:
3718 case MVT::v4i32:
3719 RC = &PPC::VRRCRegClass;
3720 break;
3721 case MVT::v4f32:
3722 RC = &PPC::VRRCRegClass;
3723 break;
3724 case MVT::v2f64:
3725 case MVT::v2i64:
3726 RC = &PPC::VRRCRegClass;
3727 break;
3728 }
3729
3730 SDValue ArgValue;
3731 // Transform the arguments stored in physical registers into
3732 // virtual ones.
3733 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3734 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3734, __PRETTY_FUNCTION__))
;
3735 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3736 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3737 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3738 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3739 if (!Subtarget.isLittleEndian())
3740 std::swap (ArgValueLo, ArgValueHi);
3741 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3742 ArgValueHi);
3743 } else {
3744 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3745 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3746 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3747 if (ValVT == MVT::i1)
3748 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3749 }
3750
3751 InVals.push_back(ArgValue);
3752 } else {
3753 // Argument stored in memory.
3754 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3754, __PRETTY_FUNCTION__))
;
3755
3756 // Get the extended size of the argument type in stack
3757 unsigned ArgSize = VA.getLocVT().getStoreSize();
3758 // Get the actual size of the argument type
3759 unsigned ObjSize = VA.getValVT().getStoreSize();
3760 unsigned ArgOffset = VA.getLocMemOffset();
3761 // Stack objects in PPC32 are right justified.
3762 ArgOffset += ArgSize - ObjSize;
3763 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3764
3765 // Create load nodes to retrieve arguments from the stack.
3766 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3767 InVals.push_back(
3768 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3769 }
3770 }
3771
3772 // Assign locations to all of the incoming aggregate by value arguments.
3773 // Aggregates passed by value are stored in the local variable space of the
3774 // caller's stack frame, right above the parameter list area.
3775 SmallVector<CCValAssign, 16> ByValArgLocs;
3776 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3777 ByValArgLocs, *DAG.getContext());
3778
3779 // Reserve stack space for the allocations in CCInfo.
3780 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3781
3782 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3783
3784 // Area that is at least reserved in the caller of this function.
3785 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3786 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3787
3788 // Set the size that is at least reserved in caller of this function. Tail
3789 // call optimized function's reserved stack space needs to be aligned so that
3790 // taking the difference between two stack areas will result in an aligned
3791 // stack.
3792 MinReservedArea =
3793 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3794 FuncInfo->setMinReservedArea(MinReservedArea);
3795
3796 SmallVector<SDValue, 8> MemOps;
3797
3798 // If the function takes variable number of arguments, make a frame index for
3799 // the start of the first vararg value... for expansion of llvm.va_start.
3800 if (isVarArg) {
3801 static const MCPhysReg GPArgRegs[] = {
3802 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3803 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3804 };
3805 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3806
3807 static const MCPhysReg FPArgRegs[] = {
3808 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3809 PPC::F8
3810 };
3811 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3812
3813 if (useSoftFloat() || hasSPE())
3814 NumFPArgRegs = 0;
3815
3816 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3817 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3818
3819 // Make room for NumGPArgRegs and NumFPArgRegs.
3820 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3821 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3822
3823 FuncInfo->setVarArgsStackOffset(
3824 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3825 CCInfo.getNextStackOffset(), true));
3826
3827 FuncInfo->setVarArgsFrameIndex(
3828 MFI.CreateStackObject(Depth, Align(8), false));
3829 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3830
3831 // The fixed integer arguments of a variadic function are stored to the
3832 // VarArgsFrameIndex on the stack so that they may be loaded by
3833 // dereferencing the result of va_next.
3834 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3835 // Get an existing live-in vreg, or add a new one.
3836 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3837 if (!VReg)
3838 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3839
3840 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3841 SDValue Store =
3842 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3843 MemOps.push_back(Store);
3844 // Increment the address by four for the next argument to store
3845 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3846 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3847 }
3848
3849 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3850 // is set.
3851 // The double arguments are stored to the VarArgsFrameIndex
3852 // on the stack.
3853 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3854 // Get an existing live-in vreg, or add a new one.
3855 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3856 if (!VReg)
3857 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3858
3859 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3860 SDValue Store =
3861 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3862 MemOps.push_back(Store);
3863 // Increment the address by eight for the next argument to store
3864 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3865 PtrVT);
3866 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3867 }
3868 }
3869
3870 if (!MemOps.empty())
3871 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3872
3873 return Chain;
3874}
3875
3876// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3877// value to MVT::i64 and then truncate to the correct register size.
3878SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3879 EVT ObjectVT, SelectionDAG &DAG,
3880 SDValue ArgVal,
3881 const SDLoc &dl) const {
3882 if (Flags.isSExt())
3883 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3884 DAG.getValueType(ObjectVT));
3885 else if (Flags.isZExt())
3886 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3887 DAG.getValueType(ObjectVT));
3888
3889 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3890}
3891
3892SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3893 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3894 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3895 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3896 // TODO: add description of PPC stack frame format, or at least some docs.
3897 //
3898 bool isELFv2ABI = Subtarget.isELFv2ABI();
3899 bool isLittleEndian = Subtarget.isLittleEndian();
3900 MachineFunction &MF = DAG.getMachineFunction();
3901 MachineFrameInfo &MFI = MF.getFrameInfo();
3902 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3903
3904 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3905, __PRETTY_FUNCTION__))
3905 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3905, __PRETTY_FUNCTION__))
;
3906
3907 EVT PtrVT = getPointerTy(MF.getDataLayout());
3908 // Potential tail calls could cause overwriting of argument stack slots.
3909 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3910 (CallConv == CallingConv::Fast));
3911 unsigned PtrByteSize = 8;
3912 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3913
3914 static const MCPhysReg GPR[] = {
3915 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3916 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3917 };
3918 static const MCPhysReg VR[] = {
3919 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3920 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3921 };
3922
3923 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3924 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3925 const unsigned Num_VR_Regs = array_lengthof(VR);
3926
3927 // Do a first pass over the arguments to determine whether the ABI
3928 // guarantees that our caller has allocated the parameter save area
3929 // on its stack frame. In the ELFv1 ABI, this is always the case;
3930 // in the ELFv2 ABI, it is true if this is a vararg function or if
3931 // any parameter is located in a stack slot.
3932
3933 bool HasParameterArea = !isELFv2ABI || isVarArg;
3934 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3935 unsigned NumBytes = LinkageSize;
3936 unsigned AvailableFPRs = Num_FPR_Regs;
3937 unsigned AvailableVRs = Num_VR_Regs;
3938 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3939 if (Ins[i].Flags.isNest())
3940 continue;
3941
3942 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3943 PtrByteSize, LinkageSize, ParamAreaSize,
3944 NumBytes, AvailableFPRs, AvailableVRs))
3945 HasParameterArea = true;
3946 }
3947
3948 // Add DAG nodes to load the arguments or copy them out of registers. On
3949 // entry to a function on PPC, the arguments start after the linkage area,
3950 // although the first ones are often in registers.
3951
3952 unsigned ArgOffset = LinkageSize;
3953 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3954 SmallVector<SDValue, 8> MemOps;
3955 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3956 unsigned CurArgIdx = 0;
3957 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3958 SDValue ArgVal;
3959 bool needsLoad = false;
3960 EVT ObjectVT = Ins[ArgNo].VT;
3961 EVT OrigVT = Ins[ArgNo].ArgVT;
3962 unsigned ObjSize = ObjectVT.getStoreSize();
3963 unsigned ArgSize = ObjSize;
3964 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3965 if (Ins[ArgNo].isOrigArg()) {
3966 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3967 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3968 }
3969 // We re-align the argument offset for each argument, except when using the
3970 // fast calling convention, when we need to make sure we do that only when
3971 // we'll actually use a stack slot.
3972 unsigned CurArgOffset;
3973 Align Alignment;
3974 auto ComputeArgOffset = [&]() {
3975 /* Respect alignment of argument on the stack. */
3976 Alignment =
3977 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3978 ArgOffset = alignTo(ArgOffset, Alignment);
3979 CurArgOffset = ArgOffset;
3980 };
3981
3982 if (CallConv != CallingConv::Fast) {
3983 ComputeArgOffset();
3984
3985 /* Compute GPR index associated with argument offset. */
3986 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3987 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3988 }
3989
3990 // FIXME the codegen can be much improved in some cases.
3991 // We do not have to keep everything in memory.
3992 if (Flags.isByVal()) {
3993 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3993, __PRETTY_FUNCTION__))
;
3994
3995 if (CallConv == CallingConv::Fast)
3996 ComputeArgOffset();
3997
3998 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3999 ObjSize = Flags.getByValSize();
4000 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4001 // Empty aggregate parameters do not take up registers. Examples:
4002 // struct { } a;
4003 // union { } b;
4004 // int c[0];
4005 // etc. However, we have to provide a place-holder in InVals, so
4006 // pretend we have an 8-byte item at the current address for that
4007 // purpose.
4008 if (!ObjSize) {
4009 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4010 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4011 InVals.push_back(FIN);
4012 continue;
4013 }
4014
4015 // Create a stack object covering all stack doublewords occupied
4016 // by the argument. If the argument is (fully or partially) on
4017 // the stack, or if the argument is fully in registers but the
4018 // caller has allocated the parameter save anyway, we can refer
4019 // directly to the caller's stack frame. Otherwise, create a
4020 // local copy in our own frame.
4021 int FI;
4022 if (HasParameterArea ||
4023 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4024 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4025 else
4026 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4027 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4028
4029 // Handle aggregates smaller than 8 bytes.
4030 if (ObjSize < PtrByteSize) {
4031 // The value of the object is its address, which differs from the
4032 // address of the enclosing doubleword on big-endian systems.
4033 SDValue Arg = FIN;
4034 if (!isLittleEndian) {
4035 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4036 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4037 }
4038 InVals.push_back(Arg);
4039
4040 if (GPR_idx != Num_GPR_Regs) {
4041 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4042 FuncInfo->addLiveInAttr(VReg, Flags);
4043 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4044 SDValue Store;
4045
4046 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4047 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4048 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4049 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4050 MachinePointerInfo(&*FuncArg), ObjType);
4051 } else {
4052 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4053 // store the whole register as-is to the parameter save area
4054 // slot.
4055 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4056 MachinePointerInfo(&*FuncArg));
4057 }
4058
4059 MemOps.push_back(Store);
4060 }
4061 // Whether we copied from a register or not, advance the offset
4062 // into the parameter save area by a full doubleword.
4063 ArgOffset += PtrByteSize;
4064 continue;
4065 }
4066
4067 // The value of the object is its address, which is the address of
4068 // its first stack doubleword.
4069 InVals.push_back(FIN);
4070
4071 // Store whatever pieces of the object are in registers to memory.
4072 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4073 if (GPR_idx == Num_GPR_Regs)
4074 break;
4075
4076 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4077 FuncInfo->addLiveInAttr(VReg, Flags);
4078 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4079 SDValue Addr = FIN;
4080 if (j) {
4081 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4082 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4083 }
4084 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4085 MachinePointerInfo(&*FuncArg, j));
4086 MemOps.push_back(Store);
4087 ++GPR_idx;
4088 }
4089 ArgOffset += ArgSize;
4090 continue;
4091 }
4092
4093 switch (ObjectVT.getSimpleVT().SimpleTy) {
4094 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4094)
;
4095 case MVT::i1:
4096 case MVT::i32:
4097 case MVT::i64:
4098 if (Flags.isNest()) {
4099 // The 'nest' parameter, if any, is passed in R11.
4100 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4101 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4102
4103 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4104 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4105
4106 break;
4107 }
4108
4109 // These can be scalar arguments or elements of an integer array type
4110 // passed directly. Clang may use those instead of "byval" aggregate
4111 // types to avoid forcing arguments to memory unnecessarily.
4112 if (GPR_idx != Num_GPR_Regs) {
4113 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4114 FuncInfo->addLiveInAttr(VReg, Flags);
4115 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4116
4117 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4118 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4119 // value to MVT::i64 and then truncate to the correct register size.
4120 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4121 } else {
4122 if (CallConv == CallingConv::Fast)
4123 ComputeArgOffset();
4124
4125 needsLoad = true;
4126 ArgSize = PtrByteSize;
4127 }
4128 if (CallConv != CallingConv::Fast || needsLoad)
4129 ArgOffset += 8;
4130 break;
4131
4132 case MVT::f32:
4133 case MVT::f64:
4134 // These can be scalar arguments or elements of a float array type
4135 // passed directly. The latter are used to implement ELFv2 homogenous
4136 // float aggregates.
4137 if (FPR_idx != Num_FPR_Regs) {
4138 unsigned VReg;
4139
4140 if (ObjectVT == MVT::f32)
4141 VReg = MF.addLiveIn(FPR[FPR_idx],
4142 Subtarget.hasP8Vector()
4143 ? &PPC::VSSRCRegClass
4144 : &PPC::F4RCRegClass);
4145 else
4146 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4147 ? &PPC::VSFRCRegClass
4148 : &PPC::F8RCRegClass);
4149
4150 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4151 ++FPR_idx;
4152 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4153 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4154 // once we support fp <-> gpr moves.
4155
4156 // This can only ever happen in the presence of f32 array types,
4157 // since otherwise we never run out of FPRs before running out
4158 // of GPRs.
4159 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4160 FuncInfo->addLiveInAttr(VReg, Flags);
4161 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4162
4163 if (ObjectVT == MVT::f32) {
4164 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4165 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4166 DAG.getConstant(32, dl, MVT::i32));
4167 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4168 }
4169
4170 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4171 } else {
4172 if (CallConv == CallingConv::Fast)
4173 ComputeArgOffset();
4174
4175 needsLoad = true;
4176 }
4177
4178 // When passing an array of floats, the array occupies consecutive
4179 // space in the argument area; only round up to the next doubleword
4180 // at the end of the array. Otherwise, each float takes 8 bytes.
4181 if (CallConv != CallingConv::Fast || needsLoad) {
4182 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4183 ArgOffset += ArgSize;
4184 if (Flags.isInConsecutiveRegsLast())
4185 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4186 }
4187 break;
4188 case MVT::v4f32:
4189 case MVT::v4i32:
4190 case MVT::v8i16:
4191 case MVT::v16i8:
4192 case MVT::v2f64:
4193 case MVT::v2i64:
4194 case MVT::v1i128:
4195 case MVT::f128:
4196 // These can be scalar arguments or elements of a vector array type
4197 // passed directly. The latter are used to implement ELFv2 homogenous
4198 // vector aggregates.
4199 if (VR_idx != Num_VR_Regs) {
4200 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4201 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4202 ++VR_idx;
4203 } else {
4204 if (CallConv == CallingConv::Fast)
4205 ComputeArgOffset();
4206 needsLoad = true;
4207 }
4208 if (CallConv != CallingConv::Fast || needsLoad)
4209 ArgOffset += 16;
4210 break;
4211 }
4212
4213 // We need to load the argument to a virtual register if we determined
4214 // above that we ran out of physical registers of the appropriate type.
4215 if (needsLoad) {
4216 if (ObjSize < ArgSize && !isLittleEndian)
4217 CurArgOffset += ArgSize - ObjSize;
4218 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4219 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4220 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4221 }
4222
4223 InVals.push_back(ArgVal);
4224 }
4225
4226 // Area that is at least reserved in the caller of this function.
4227 unsigned MinReservedArea;
4228 if (HasParameterArea)
4229 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4230 else
4231 MinReservedArea = LinkageSize;
4232
4233 // Set the size that is at least reserved in caller of this function. Tail
4234 // call optimized functions' reserved stack space needs to be aligned so that
4235 // taking the difference between two stack areas will result in an aligned
4236 // stack.
4237 MinReservedArea =
4238 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4239 FuncInfo->setMinReservedArea(MinReservedArea);
4240
4241 // If the function takes variable number of arguments, make a frame index for
4242 // the start of the first vararg value... for expansion of llvm.va_start.
4243 // On ELFv2ABI spec, it writes:
4244 // C programs that are intended to be *portable* across different compilers
4245 // and architectures must use the header file <stdarg.h> to deal with variable
4246 // argument lists.
4247 if (isVarArg && MFI.hasVAStart()) {
4248 int Depth = ArgOffset;
4249
4250 FuncInfo->setVarArgsFrameIndex(
4251 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4252 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4253
4254 // If this function is vararg, store any remaining integer argument regs
4255 // to their spots on the stack so that they may be loaded by dereferencing
4256 // the result of va_next.
4257 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4258 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4259 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4260 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4261 SDValue Store =
4262 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4263 MemOps.push_back(Store);
4264 // Increment the address by four for the next argument to store
4265 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4266 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4267 }
4268 }
4269
4270 if (!MemOps.empty())
4271 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4272
4273 return Chain;
4274}
4275
4276SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4277 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4278 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4279 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4280 // TODO: add description of PPC stack frame format, or at least some docs.
4281 //
4282 MachineFunction &MF = DAG.getMachineFunction();
4283 MachineFrameInfo &MFI = MF.getFrameInfo();
4284 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4285
4286 EVT PtrVT = getPointerTy(MF.getDataLayout());
4287 bool isPPC64 = PtrVT == MVT::i64;
4288 // Potential tail calls could cause overwriting of argument stack slots.
4289 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4290 (CallConv == CallingConv::Fast));
4291 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4292 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4293 unsigned ArgOffset = LinkageSize;
4294 // Area that is at least reserved in caller of this function.
4295 unsigned MinReservedArea = ArgOffset;
4296
4297 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4298 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4299 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4300 };
4301 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4302 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4303 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4304 };
4305 static const MCPhysReg VR[] = {
4306 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4307 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4308 };
4309
4310 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4311 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4312 const unsigned Num_VR_Regs = array_lengthof( VR);
4313
4314 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4315
4316 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4317
4318 // In 32-bit non-varargs functions, the stack space for vectors is after the
4319 // stack space for non-vectors. We do not use this space unless we have
4320 // too many vectors to fit in registers, something that only occurs in
4321 // constructed examples:), but we have to walk the arglist to figure
4322 // that out...for the pathological case, compute VecArgOffset as the
4323 // start of the vector parameter area. Computing VecArgOffset is the
4324 // entire point of the following loop.
4325 unsigned VecArgOffset = ArgOffset;
4326 if (!isVarArg && !isPPC64) {
4327 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4328 ++ArgNo) {
4329 EVT ObjectVT = Ins[ArgNo].VT;
4330 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4331
4332 if (Flags.isByVal()) {
4333 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4334 unsigned ObjSize = Flags.getByValSize();
4335 unsigned ArgSize =
4336 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4337 VecArgOffset += ArgSize;
4338 continue;
4339 }
4340
4341 switch(ObjectVT.getSimpleVT().SimpleTy) {
4342 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4342)
;
4343 case MVT::i1:
4344 case MVT::i32:
4345 case MVT::f32:
4346 VecArgOffset += 4;
4347 break;
4348 case MVT::i64: // PPC64
4349 case MVT::f64:
4350 // FIXME: We are guaranteed to be !isPPC64 at this point.
4351 // Does MVT::i64 apply?
4352 VecArgOffset += 8;
4353 break;
4354 case MVT::v4f32:
4355 case MVT::v4i32:
4356 case MVT::v8i16:
4357 case MVT::v16i8:
4358 // Nothing to do, we're only looking at Nonvector args here.
4359 break;
4360 }
4361 }
4362 }
4363 // We've found where the vector parameter area in memory is. Skip the
4364 // first 12 parameters; these don't use that memory.
4365 VecArgOffset = ((VecArgOffset+15)/16)*16;
4366 VecArgOffset += 12*16;
4367
4368 // Add DAG nodes to load the arguments or copy them out of registers. On
4369 // entry to a function on PPC, the arguments start after the linkage area,
4370 // although the first ones are often in registers.
4371
4372 SmallVector<SDValue, 8> MemOps;
4373 unsigned nAltivecParamsAtEnd = 0;
4374 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4375 unsigned CurArgIdx = 0;
4376 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4377 SDValue ArgVal;
4378 bool needsLoad = false;
4379 EVT ObjectVT = Ins[ArgNo].VT;
4380 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4381 unsigned ArgSize = ObjSize;
4382 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4383 if (Ins[ArgNo].isOrigArg()) {
4384 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4385 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4386 }
4387 unsigned CurArgOffset = ArgOffset;
4388
4389 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4390 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4391 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4392 if (isVarArg || isPPC64) {
4393 MinReservedArea = ((MinReservedArea+15)/16)*16;
4394 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4395 Flags,
4396 PtrByteSize);
4397 } else nAltivecParamsAtEnd++;
4398 } else
4399 // Calculate min reserved area.
4400 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4401 Flags,
4402 PtrByteSize);
4403
4404 // FIXME the codegen can be much improved in some cases.
4405 // We do not have to keep everything in memory.
4406 if (Flags.isByVal()) {
4407 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4407, __PRETTY_FUNCTION__))
;
4408
4409 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4410 ObjSize = Flags.getByValSize();
4411 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4412 // Objects of size 1 and 2 are right justified, everything else is
4413 // left justified. This means the memory address is adjusted forwards.
4414 if (ObjSize==1 || ObjSize==2) {
4415 CurArgOffset = CurArgOffset + (4 - ObjSize);
4416 }
4417 // The value of the object is its address.
4418 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4419 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4420 InVals.push_back(FIN);
4421 if (ObjSize==1 || ObjSize==2) {
4422 if (GPR_idx != Num_GPR_Regs) {
4423 unsigned VReg;
4424 if (isPPC64)
4425 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4426 else
4427 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4428 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4429 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4430 SDValue Store =
4431 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4432 MachinePointerInfo(&*FuncArg), ObjType);
4433 MemOps.push_back(Store);
4434 ++GPR_idx;
4435 }
4436
4437 ArgOffset += PtrByteSize;
4438
4439 continue;
4440 }
4441 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4442 // Store whatever pieces of the object are in registers
4443 // to memory. ArgOffset will be the address of the beginning
4444 // of the object.
4445 if (GPR_idx != Num_GPR_Regs) {
4446 unsigned VReg;
4447 if (isPPC64)
4448 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4449 else
4450 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4451 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4452 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4453 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4454 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4455 MachinePointerInfo(&*FuncArg, j));
4456 MemOps.push_back(Store);
4457 ++GPR_idx;
4458 ArgOffset += PtrByteSize;
4459 } else {
4460 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4461 break;
4462 }
4463 }
4464 continue;
4465 }
4466
4467 switch (ObjectVT.getSimpleVT().SimpleTy) {
4468 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4468)
;
4469 case MVT::i1:
4470 case MVT::i32:
4471 if (!isPPC64) {
4472 if (GPR_idx != Num_GPR_Regs) {
4473 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4474 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4475
4476 if (ObjectVT == MVT::i1)
4477 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4478
4479 ++GPR_idx;
4480 } else {
4481 needsLoad = true;
4482 ArgSize = PtrByteSize;
4483 }
4484 // All int arguments reserve stack space in the Darwin ABI.
4485 ArgOffset += PtrByteSize;
4486 break;
4487 }
4488 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4489 case MVT::i64: // PPC64
4490 if (GPR_idx != Num_GPR_Regs) {
4491 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4492 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4493
4494 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4495 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4496 // value to MVT::i64 and then truncate to the correct register size.
4497 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4498
4499 ++GPR_idx;
4500 } else {
4501 needsLoad = true;
4502 ArgSize = PtrByteSize;
4503 }
4504 // All int arguments reserve stack space in the Darwin ABI.
4505 ArgOffset += 8;
4506 break;
4507
4508 case MVT::f32:
4509 case MVT::f64:
4510 // Every 4 bytes of argument space consumes one of the GPRs available for
4511 // argument passing.
4512 if (GPR_idx != Num_GPR_Regs) {
4513 ++GPR_idx;
4514 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4515 ++GPR_idx;
4516 }
4517 if (FPR_idx != Num_FPR_Regs) {
4518 unsigned VReg;
4519
4520 if (ObjectVT == MVT::f32)
4521 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4522 else
4523 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4524
4525 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4526 ++FPR_idx;
4527 } else {
4528 needsLoad = true;
4529 }
4530
4531 // All FP arguments reserve stack space in the Darwin ABI.
4532 ArgOffset += isPPC64 ? 8 : ObjSize;
4533 break;
4534 case MVT::v4f32:
4535 case MVT::v4i32:
4536 case MVT::v8i16:
4537 case MVT::v16i8:
4538 // Note that vector arguments in registers don't reserve stack space,
4539 // except in varargs functions.
4540 if (VR_idx != Num_VR_Regs) {
4541 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4542 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4543 if (isVarArg) {
4544 while ((ArgOffset % 16) != 0) {
4545 ArgOffset += PtrByteSize;
4546 if (GPR_idx != Num_GPR_Regs)
4547 GPR_idx++;
4548 }
4549 ArgOffset += 16;
4550 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4551 }
4552 ++VR_idx;
4553 } else {
4554 if (!isVarArg && !isPPC64) {
4555 // Vectors go after all the nonvectors.
4556 CurArgOffset = VecArgOffset;
4557 VecArgOffset += 16;
4558 } else {
4559 // Vectors are aligned.
4560 ArgOffset = ((ArgOffset+15)/16)*16;
4561 CurArgOffset = ArgOffset;
4562 ArgOffset += 16;
4563 }
4564 needsLoad = true;
4565 }
4566 break;
4567 }
4568
4569 // We need to load the argument to a virtual register if we determined above
4570 // that we ran out of physical registers of the appropriate type.
4571 if (needsLoad) {
4572 int FI = MFI.CreateFixedObject(ObjSize,
4573 CurArgOffset + (ArgSize - ObjSize),
4574 isImmutable);
4575 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4576 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4577 }
4578
4579 InVals.push_back(ArgVal);
4580 }
4581
4582 // Allow for Altivec parameters at the end, if needed.
4583 if (nAltivecParamsAtEnd) {
4584 MinReservedArea = ((MinReservedArea+15)/16)*16;
4585 MinReservedArea += 16*nAltivecParamsAtEnd;
4586 }
4587
4588 // Area that is at least reserved in the caller of this function.
4589 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4590
4591 // Set the size that is at least reserved in caller of this function. Tail
4592 // call optimized functions' reserved stack space needs to be aligned so that
4593 // taking the difference between two stack areas will result in an aligned
4594 // stack.
4595 MinReservedArea =
4596 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4597 FuncInfo->setMinReservedArea(MinReservedArea);
4598
4599 // If the function takes variable number of arguments, make a frame index for
4600 // the start of the first vararg value... for expansion of llvm.va_start.
4601 if (isVarArg) {
4602 int Depth = ArgOffset;
4603
4604 FuncInfo->setVarArgsFrameIndex(
4605 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4606 Depth, true));
4607 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4608
4609 // If this function is vararg, store any remaining integer argument regs
4610 // to their spots on the stack so that they may be loaded by dereferencing
4611 // the result of va_next.
4612 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4613 unsigned VReg;
4614
4615 if (isPPC64)
4616 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4617 else
4618 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4619
4620 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4621 SDValue Store =
4622 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4623 MemOps.push_back(Store);
4624 // Increment the address by four for the next argument to store
4625 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4626 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4627 }
4628 }
4629
4630 if (!MemOps.empty())
4631 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4632
4633 return Chain;
4634}
4635
4636/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4637/// adjusted to accommodate the arguments for the tailcall.
4638static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4639 unsigned ParamSize) {
4640
4641 if (!isTailCall) return 0;
4642
4643 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4644 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4645 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4646 // Remember only if the new adjustment is bigger.
4647 if (SPDiff < FI->getTailCallSPDelta())
4648 FI->setTailCallSPDelta(SPDiff);
4649
4650 return SPDiff;
4651}
4652
4653static bool isFunctionGlobalAddress(SDValue Callee);
4654
4655static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4656 const TargetMachine &TM) {
4657 // It does not make sense to call callsShareTOCBase() with a caller that
4658 // is PC Relative since PC Relative callers do not have a TOC.
4659#ifndef NDEBUG
4660 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4661 assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4662, __PRETTY_FUNCTION__))
4662 "PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4662, __PRETTY_FUNCTION__))
;
4663#endif
4664
4665 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4666 // don't have enough information to determine if the caller and callee share
4667 // the same TOC base, so we have to pessimistically assume they don't for
4668 // correctness.
4669 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4670 if (!G)
4671 return false;
4672
4673 const GlobalValue *GV = G->getGlobal();
4674
4675 // If the callee is preemptable, then the static linker will use a plt-stub
4676 // which saves the toc to the stack, and needs a nop after the call
4677 // instruction to convert to a toc-restore.
4678 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4679 return false;
4680
4681 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4682 // We may need a TOC restore in the situation where the caller requires a
4683 // valid TOC but the callee is PC Relative and does not.
4684 const Function *F = dyn_cast<Function>(GV);
4685 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4686
4687 // If we have an Alias we can try to get the function from there.
4688 if (Alias) {
4689 const GlobalObject *GlobalObj = Alias->getBaseObject();
4690 F = dyn_cast<Function>(GlobalObj);
4691 }
4692
4693 // If we still have no valid function pointer we do not have enough
4694 // information to determine if the callee uses PC Relative calls so we must
4695 // assume that it does.
4696 if (!F)
4697 return false;
4698
4699 // If the callee uses PC Relative we cannot guarantee that the callee won't
4700 // clobber the TOC of the caller and so we must assume that the two
4701 // functions do not share a TOC base.
4702 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4703 if (STICallee->isUsingPCRelativeCalls())
4704 return false;
4705
4706 // The medium and large code models are expected to provide a sufficiently
4707 // large TOC to provide all data addressing needs of a module with a
4708 // single TOC.
4709 if (CodeModel::Medium == TM.getCodeModel() ||
4710 CodeModel::Large == TM.getCodeModel())
4711 return true;
4712
4713 // Otherwise we need to ensure callee and caller are in the same section,
4714 // since the linker may allocate multiple TOCs, and we don't know which
4715 // sections will belong to the same TOC base.
4716 if (!GV->isStrongDefinitionForLinker())
4717 return false;
4718
4719 // Any explicitly-specified sections and section prefixes must also match.
4720 // Also, if we're using -ffunction-sections, then each function is always in
4721 // a different section (the same is true for COMDAT functions).
4722 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4723 GV->getSection() != Caller->getSection())
4724 return false;
4725 if (const auto *F = dyn_cast<Function>(GV)) {
4726 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4727 return false;
4728 }
4729
4730 return true;
4731}
4732
4733static bool
4734needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4735 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4736 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4736, __PRETTY_FUNCTION__))
;
4737
4738 const unsigned PtrByteSize = 8;
4739 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4740
4741 static const MCPhysReg GPR[] = {
4742 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4743 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4744 };
4745 static const MCPhysReg VR[] = {
4746 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4747 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4748 };
4749
4750 const unsigned NumGPRs = array_lengthof(GPR);
4751 const unsigned NumFPRs = 13;
4752 const unsigned NumVRs = array_lengthof(VR);
4753 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4754
4755 unsigned NumBytes = LinkageSize;
4756 unsigned AvailableFPRs = NumFPRs;
4757 unsigned AvailableVRs = NumVRs;
4758
4759 for (const ISD::OutputArg& Param : Outs) {
4760 if (Param.Flags.isNest()) continue;
4761
4762 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4763 LinkageSize, ParamAreaSize, NumBytes,
4764 AvailableFPRs, AvailableVRs))
4765 return true;
4766 }
4767 return false;
4768}
4769
4770static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4771 if (CB.arg_size() != CallerFn->arg_size())
4772 return false;
4773
4774 auto CalleeArgIter = CB.arg_begin();
4775 auto CalleeArgEnd = CB.arg_end();
4776 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4777
4778 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4779 const Value* CalleeArg = *CalleeArgIter;
4780 const Value* CallerArg = &(*CallerArgIter);
4781 if (CalleeArg == CallerArg)
4782 continue;
4783
4784 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4785 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4786 // }
4787 // 1st argument of callee is undef and has the same type as caller.
4788 if (CalleeArg->getType() == CallerArg->getType() &&
4789 isa<UndefValue>(CalleeArg))
4790 continue;
4791
4792 return false;
4793 }
4794
4795 return true;
4796}
4797
4798// Returns true if TCO is possible between the callers and callees
4799// calling conventions.
4800static bool
4801areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4802 CallingConv::ID CalleeCC) {
4803 // Tail calls are possible with fastcc and ccc.
4804 auto isTailCallableCC = [] (CallingConv::ID CC){
4805 return CC == CallingConv::C || CC == CallingConv::Fast;
4806 };
4807 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4808 return false;
4809
4810 // We can safely tail call both fastcc and ccc callees from a c calling
4811 // convention caller. If the caller is fastcc, we may have less stack space
4812 // than a non-fastcc caller with the same signature so disable tail-calls in
4813 // that case.
4814 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4815}
4816
4817bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4818 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4819 const SmallVectorImpl<ISD::OutputArg> &Outs,
4820 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4821 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4822
4823 if (DisableSCO && !TailCallOpt) return false;
4824
4825 // Variadic argument functions are not supported.
4826 if (isVarArg) return false;
4827
4828 auto &Caller = DAG.getMachineFunction().getFunction();
4829 // Check that the calling conventions are compatible for tco.
4830 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4831 return false;
4832
4833 // Caller contains any byval parameter is not supported.
4834 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4835 return false;
4836
4837 // Callee contains any byval parameter is not supported, too.
4838 // Note: This is a quick work around, because in some cases, e.g.
4839 // caller's stack size > callee's stack size, we are still able to apply
4840 // sibling call optimization. For example, gcc is able to do SCO for caller1
4841 // in the following example, but not for caller2.
4842 // struct test {
4843 // long int a;
4844 // char ary[56];
4845 // } gTest;
4846 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4847 // b->a = v.a;
4848 // return 0;
4849 // }
4850 // void caller1(struct test a, struct test c, struct test *b) {
4851 // callee(gTest, b); }
4852 // void caller2(struct test *b) { callee(gTest, b); }
4853 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4854 return false;
4855
4856 // If callee and caller use different calling conventions, we cannot pass
4857 // parameters on stack since offsets for the parameter area may be different.
4858 if (Caller.getCallingConv() != CalleeCC &&
4859 needStackSlotPassParameters(Subtarget, Outs))
4860 return false;
4861
4862 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4863 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4864 // callee potentially have different TOC bases then we cannot tail call since
4865 // we need to restore the TOC pointer after the call.
4866 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4867 // We cannot guarantee this for indirect calls or calls to external functions.
4868 // When PC-Relative addressing is used, the concept of the TOC is no longer
4869 // applicable so this check is not required.
4870 // Check first for indirect calls.
4871 if (!Subtarget.isUsingPCRelativeCalls() &&
4872 !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4873 return false;
4874
4875 // Check if we share the TOC base.
4876 if (!Subtarget.isUsingPCRelativeCalls() &&
4877 !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4878 return false;
4879
4880 // TCO allows altering callee ABI, so we don't have to check further.
4881 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4882 return true;
4883
4884 if (DisableSCO) return false;
4885
4886 // If callee use the same argument list that caller is using, then we can
4887 // apply SCO on this case. If it is not, then we need to check if callee needs
4888 // stack for passing arguments.
4889 // PC Relative tail calls may not have a CallBase.
4890 // If there is no CallBase we cannot verify if we have the same argument
4891 // list so assume that we don't have the same argument list.
4892 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4893 needStackSlotPassParameters(Subtarget, Outs))
4894 return false;
4895 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4896 return false;
4897
4898 return true;
4899}
4900
4901/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4902/// for tail call optimization. Targets which want to do tail call
4903/// optimization should implement this function.
4904bool
4905PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4906 CallingConv::ID CalleeCC,
4907 bool isVarArg,
4908 const SmallVectorImpl<ISD::InputArg> &Ins,
4909 SelectionDAG& DAG) const {
4910 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4911 return false;
4912
4913 // Variable argument functions are not supported.
4914 if (isVarArg)
4915 return false;
4916
4917 MachineFunction &MF = DAG.getMachineFunction();
4918 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4919 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4920 // Functions containing by val parameters are not supported.
4921 for (unsigned i = 0; i != Ins.size(); i++) {
4922 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4923 if (Flags.isByVal()) return false;
4924 }
4925
4926 // Non-PIC/GOT tail calls are supported.
4927 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4928 return true;
4929
4930 // At the moment we can only do local tail calls (in same module, hidden
4931 // or protected) if we are generating PIC.
4932 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4933 return G->getGlobal()->hasHiddenVisibility()
4934 || G->getGlobal()->hasProtectedVisibility();
4935 }
4936
4937 return false;
4938}
4939
4940/// isCallCompatibleAddress - Return the immediate to use if the specified
4941/// 32-bit value is representable in the immediate field of a BxA instruction.
4942static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4943 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4944 if (!C) return nullptr;
4945
4946 int Addr = C->getZExtValue();
4947 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4948 SignExtend32<26>(Addr) != Addr)
4949 return nullptr; // Top 6 bits have to be sext of immediate.
4950
4951 return DAG
4952 .getConstant(
4953 (int)C->getZExtValue() >> 2, SDLoc(Op),
4954 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4955 .getNode();
4956}
4957
4958namespace {
4959
4960struct TailCallArgumentInfo {
4961 SDValue Arg;
4962 SDValue FrameIdxOp;
4963 int FrameIdx = 0;
4964
4965 TailCallArgumentInfo() = default;
4966};
4967
4968} // end anonymous namespace
4969
4970/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4971static void StoreTailCallArgumentsToStackSlot(
4972 SelectionDAG &DAG, SDValue Chain,
4973 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4974 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4975 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4976 SDValue Arg = TailCallArgs[i].Arg;
4977 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4978 int FI = TailCallArgs[i].FrameIdx;
4979 // Store relative to framepointer.
4980 MemOpChains.push_back(DAG.getStore(
4981 Chain, dl, Arg, FIN,
4982 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4983 }
4984}
4985
4986/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4987/// the appropriate stack slot for the tail call optimized function call.
4988static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4989 SDValue OldRetAddr, SDValue OldFP,
4990 int SPDiff, const SDLoc &dl) {
4991 if (SPDiff) {
4992 // Calculate the new stack slot for the return address.
4993 MachineFunction &MF = DAG.getMachineFunction();
4994 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4995 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4996 bool isPPC64 = Subtarget.isPPC64();
4997 int SlotSize = isPPC64 ? 8 : 4;
4998 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4999 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5000 NewRetAddrLoc, true);
5001 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5002 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5003 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5004 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5005 }
5006 return Chain;
5007}
5008
5009/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5010/// the position of the argument.
5011static void
5012CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5013 SDValue Arg, int SPDiff, unsigned ArgOffset,
5014 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5015 int Offset = ArgOffset + SPDiff;
5016 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5017 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5018 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5019 SDValue FIN = DAG.getFrameIndex(FI, VT);
5020 TailCallArgumentInfo Info;
5021 Info.Arg = Arg;
5022 Info.FrameIdxOp = FIN;
5023 Info.FrameIdx = FI;
5024 TailCallArguments.push_back(Info);
5025}
5026
5027/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5028/// stack slot. Returns the chain as result and the loaded frame pointers in
5029/// LROpOut/FPOpout. Used when tail calling.
5030SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5031 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5032 SDValue &FPOpOut, const SDLoc &dl) const {
5033 if (SPDiff) {
5034 // Load the LR and FP stack slot for later adjusting.
5035 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5036 LROpOut = getReturnAddrFrameIndex(DAG);
5037 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5038 Chain = SDValue(LROpOut.getNode(), 1);
5039 }
5040 return Chain;
5041}
5042
5043/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5044/// by "Src" to address "Dst" of size "Size". Alignment information is
5045/// specified by the specific parameter attribute. The copy will be passed as
5046/// a byval function parameter.
5047/// Sometimes what we are copying is the end of a larger object, the part that
5048/// does not fit in registers.
5049static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5050 SDValue Chain, ISD::ArgFlagsTy Flags,
5051 SelectionDAG &DAG, const SDLoc &dl) {
5052 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5053 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5054 Flags.getNonZeroByValAlign(), false, false, false,
5055 MachinePointerInfo(), MachinePointerInfo());
5056}
5057
5058/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5059/// tail calls.
5060static void LowerMemOpCallTo(
5061 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5062 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5063 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5064 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5065 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5066 if (!isTailCall) {
5067 if (isVector) {
5068 SDValue StackPtr;
5069 if (isPPC64)
5070 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5071 else
5072 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5073 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5074 DAG.getConstant(ArgOffset, dl, PtrVT));
5075 }
5076 MemOpChains.push_back(
5077 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5078 // Calculate and remember argument location.
5079 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5080 TailCallArguments);
5081}
5082
5083static void
5084PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5085 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5086 SDValue FPOp,
5087 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5088 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5089 // might overwrite each other in case of tail call optimization.
5090 SmallVector<SDValue, 8> MemOpChains2;
5091 // Do not flag preceding copytoreg stuff together with the following stuff.
5092 InFlag = SDValue();
5093 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5094 MemOpChains2, dl);
5095 if (!MemOpChains2.empty())
5096 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5097
5098 // Store the return address to the appropriate stack slot.
5099 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5100
5101 // Emit callseq_end just before tailcall node.
5102 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5103 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5104 InFlag = Chain.getValue(1);
5105}
5106
5107// Is this global address that of a function that can be called by name? (as
5108// opposed to something that must hold a descriptor for an indirect call).
5109static bool isFunctionGlobalAddress(SDValue Callee) {
5110 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5111 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5112 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5113 return false;
5114
5115 return G->getGlobal()->getValueType()->isFunctionTy();
5116 }
5117
5118 return false;
5119}
5120
5121SDValue PPCTargetLowering::LowerCallResult(
5122 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5124 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5125 SmallVector<CCValAssign, 16> RVLocs;
5126 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5127 *DAG.getContext());
5128
5129 CCRetInfo.AnalyzeCallResult(
5130 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5131 ? RetCC_PPC_Cold
5132 : RetCC_PPC);
5133
5134 // Copy all of the result registers out of their specified physreg.
5135 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5136 CCValAssign &VA = RVLocs[i];
5137 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5137, __PRETTY_FUNCTION__))
;
5138
5139 SDValue Val;
5140
5141 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5142 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5143 InFlag);
5144 Chain = Lo.getValue(1);
5145 InFlag = Lo.getValue(2);
5146 VA = RVLocs[++i]; // skip ahead to next loc
5147 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5148 InFlag);
5149 Chain = Hi.getValue(1);
5150 InFlag = Hi.getValue(2);
5151 if (!Subtarget.isLittleEndian())
5152 std::swap (Lo, Hi);
5153 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5154 } else {
5155 Val = DAG.getCopyFromReg(Chain, dl,
5156 VA.getLocReg(), VA.getLocVT(), InFlag);
5157 Chain = Val.getValue(1);
5158 InFlag = Val.getValue(2);
5159 }
5160
5161 switch (VA.getLocInfo()) {
5162 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5162)
;
5163 case CCValAssign::Full: break;
5164 case CCValAssign::AExt:
5165 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5166 break;
5167 case CCValAssign::ZExt:
5168 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5169 DAG.getValueType(VA.getValVT()));
5170 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5171 break;
5172 case CCValAssign::SExt:
5173 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5174 DAG.getValueType(VA.getValVT()));
5175 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5176 break;
5177 }
5178
5179 InVals.push_back(Val);
5180 }
5181
5182 return Chain;
5183}
5184
5185static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5186 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5187 // PatchPoint calls are not indirect.
5188 if (isPatchPoint)
5189 return false;
5190
5191 if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5192 return false;
5193
5194 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5195 // becuase the immediate function pointer points to a descriptor instead of
5196 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5197 // pointer immediate points to the global entry point, while the BLA would
5198 // need to jump to the local entry point (see rL211174).
5199 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5200 isBLACompatibleAddress(Callee, DAG))
5201 return false;
5202
5203 return true;
5204}
5205
5206// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5207static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5208 return Subtarget.isAIXABI() ||
5209 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5210}
5211
5212static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5213 const Function &Caller,
5214 const SDValue &Callee,
5215 const PPCSubtarget &Subtarget,
5216 const TargetMachine &TM) {
5217 if (CFlags.IsTailCall)
5218 return PPCISD::TC_RETURN;
5219
5220 // This is a call through a function pointer.
5221 if (CFlags.IsIndirect) {
5222 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5223 // indirect calls. The save of the caller's TOC pointer to the stack will be
5224 // inserted into the DAG as part of call lowering. The restore of the TOC
5225 // pointer is modeled by using a pseudo instruction for the call opcode that
5226 // represents the 2 instruction sequence of an indirect branch and link,
5227 // immediately followed by a load of the TOC pointer from the the stack save
5228 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5229 // as it is not saved or used.
5230 return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5231 : PPCISD::BCTRL;
5232 }
5233
5234 if (Subtarget.isUsingPCRelativeCalls()) {
5235 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5235, __PRETTY_FUNCTION__))
;
5236 return PPCISD::CALL_NOTOC;
5237 }
5238
5239 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5240 // immediately following the call instruction if the caller and callee may
5241 // have different TOC bases. At link time if the linker determines the calls
5242 // may not share a TOC base, the call is redirected to a trampoline inserted
5243 // by the linker. The trampoline will (among other things) save the callers
5244 // TOC pointer at an ABI designated offset in the linkage area and the linker
5245 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5246 // into gpr2.
5247 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5248 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5249 : PPCISD::CALL_NOP;
5250
5251 return PPCISD::CALL;
5252}
5253
5254static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5255 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5256 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5257 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5258 return SDValue(Dest, 0);
5259
5260 // Returns true if the callee is local, and false otherwise.
5261 auto isLocalCallee = [&]() {
5262 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5263 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5264 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5265
5266 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5267 !dyn_cast_or_null<GlobalIFunc>(GV);
5268 };
5269
5270 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5271 // a static relocation model causes some versions of GNU LD (2.17.50, at
5272 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5273 // built with secure-PLT.
5274 bool UsePlt =
5275 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5276 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5277
5278 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5279 const TargetMachine &TM = Subtarget.getTargetMachine();
5280 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5281 MCSymbolXCOFF *S =
5282 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5283
5284 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5285 return DAG.getMCSymbol(S, PtrVT);
5286 };
5287
5288 if (isFunctionGlobalAddress(Callee)) {
5289 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5290
5291 if (Subtarget.isAIXABI()) {
5292 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5292, __PRETTY_FUNCTION__))
;
5293 return getAIXFuncEntryPointSymbolSDNode(GV);
5294 }
5295 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5296 UsePlt ? PPCII::MO_PLT : 0);
5297 }
5298
5299 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5300 const char *SymName = S->getSymbol();
5301 if (Subtarget.isAIXABI()) {
5302 // If there exists a user-declared function whose name is the same as the
5303 // ExternalSymbol's, then we pick up the user-declared version.
5304 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5305 if (const Function *F =
5306 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5307 return getAIXFuncEntryPointSymbolSDNode(F);
5308
5309 // On AIX, direct function calls reference the symbol for the function's
5310 // entry point, which is named by prepending a "." before the function's
5311 // C-linkage name. A Qualname is returned here because an external
5312 // function entry point is a csect with XTY_ER property.
5313 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5314 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5315 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5316 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5317 SectionKind::getMetadata());
5318 return Sec->getQualNameSymbol();
5319 };
5320
5321 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5322 }
5323 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5324 UsePlt ? PPCII::MO_PLT : 0);
5325 }
5326
5327 // No transformation needed.
5328 assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
;
5329 return Callee;
5330}
5331
5332static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5333 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5334, __PRETTY_FUNCTION__))
5334 "Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5334, __PRETTY_FUNCTION__))
;
5335
5336 // The last operand is the chain, except when the node has glue. If the node
5337 // has glue, then the last operand is the glue, and the chain is the second
5338 // last operand.
5339 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5340 if (LastValue.getValueType() != MVT::Glue)
5341 return LastValue;
5342
5343 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5344}
5345
5346// Creates the node that moves a functions address into the count register
5347// to prepare for an indirect call instruction.
5348static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5349 SDValue &Glue, SDValue &Chain,
5350 const SDLoc &dl) {
5351 SDValue MTCTROps[] = {Chain, Callee, Glue};
5352 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5353 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5354 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5355 // The glue is the second value produced.
5356 Glue = Chain.getValue(1);
5357}
5358
5359static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5360 SDValue &Glue, SDValue &Chain,
5361 SDValue CallSeqStart,
5362 const CallBase *CB, const SDLoc &dl,
5363 bool hasNest,
5364 const PPCSubtarget &Subtarget) {
5365 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5366 // entry point, but to the function descriptor (the function entry point
5367 // address is part of the function descriptor though).
5368 // The function descriptor is a three doubleword structure with the
5369 // following fields: function entry point, TOC base address and
5370 // environment pointer.
5371 // Thus for a call through a function pointer, the following actions need
5372 // to be performed:
5373 // 1. Save the TOC of the caller in the TOC save area of its stack
5374 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5375 // 2. Load the address of the function entry point from the function
5376 // descriptor.
5377 // 3. Load the TOC of the callee from the function descriptor into r2.
5378 // 4. Load the environment pointer from the function descriptor into
5379 // r11.
5380 // 5. Branch to the function entry point address.
5381 // 6. On return of the callee, the TOC of the caller needs to be
5382 // restored (this is done in FinishCall()).
5383 //
5384 // The loads are scheduled at the beginning of the call sequence, and the
5385 // register copies are flagged together to ensure that no other
5386 // operations can be scheduled in between. E.g. without flagging the
5387 // copies together, a TOC access in the caller could be scheduled between
5388 // the assignment of the callee TOC and the branch to the callee, which leads
5389 // to incorrect code.
5390
5391 // Start by loading the function address from the descriptor.
5392 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5393 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5394 ? (MachineMemOperand::MODereferenceable |
5395 MachineMemOperand::MOInvariant)
5396 : MachineMemOperand::MONone;
5397
5398 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5399
5400 // Registers used in building the DAG.
5401 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5402 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5403
5404 // Offsets of descriptor members.
5405 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5406 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5407
5408 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5409 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5410
5411 // One load for the functions entry point address.
5412 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5413 Alignment, MMOFlags);
5414
5415 // One for loading the TOC anchor for the module that contains the called
5416 // function.
5417 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5418 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5419 SDValue TOCPtr =
5420 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5421 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5422
5423 // One for loading the environment pointer.
5424 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5425 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5426 SDValue LoadEnvPtr =
5427 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5428 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5429
5430
5431 // Then copy the newly loaded TOC anchor to the TOC pointer.
5432 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5433 Chain = TOCVal.getValue(0);
5434 Glue = TOCVal.getValue(1);
5435
5436 // If the function call has an explicit 'nest' parameter, it takes the
5437 // place of the environment pointer.
5438 assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5439, __PRETTY_FUNCTION__))
5439 "Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5439, __PRETTY_FUNCTION__))
;
5440 if (!hasNest) {
5441 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5442 Chain = EnvVal.getValue(0);
5443 Glue = EnvVal.getValue(1);
5444 }
5445
5446 // The rest of the indirect call sequence is the same as the non-descriptor
5447 // DAG.
5448 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5449}
5450
5451static void
5452buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5453 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5454 SelectionDAG &DAG,
5455 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5456 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5457 const PPCSubtarget &Subtarget) {
5458 const bool IsPPC64 = Subtarget.isPPC64();
5459 // MVT for a general purpose register.
5460 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5461
5462 // First operand is always the chain.
5463 Ops.push_back(Chain);
5464
5465 // If it's a direct call pass the callee as the second operand.
5466 if (!CFlags.IsIndirect)
5467 Ops.push_back(Callee);
5468 else {
5469 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5469, __PRETTY_FUNCTION__))
;
5470
5471 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5472 // on the stack (this would have been done in `LowerCall_64SVR4` or
5473 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5474 // represents both the indirect branch and a load that restores the TOC
5475 // pointer from the linkage area. The operand for the TOC restore is an add
5476 // of the TOC save offset to the stack pointer. This must be the second
5477 // operand: after the chain input but before any other variadic arguments.
5478 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5479 // saved or used.
5480 if (isTOCSaveRestoreRequired(Subtarget)) {
5481 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5482
5483 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5484 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5485 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5486 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5487 Ops.push_back(AddTOC);
5488 }
5489
5490 // Add the register used for the environment pointer.
5491 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5492 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5493 RegVT));
5494
5495
5496 // Add CTR register as callee so a bctr can be emitted later.
5497 if (CFlags.IsTailCall)
5498 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5499 }
5500
5501 // If this is a tail call add stack pointer delta.
5502 if (CFlags.IsTailCall)
5503 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5504
5505 // Add argument registers to the end of the list so that they are known live
5506 // into the call.
5507 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5508 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5509 RegsToPass[i].second.getValueType()));
5510
5511 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5512 // no way to mark dependencies as implicit here.
5513 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5514 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5515 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5516 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5517
5518 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5519 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5520 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5521
5522 // Add a register mask operand representing the call-preserved registers.
5523 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5524 const uint32_t *Mask =
5525 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5526 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5526, __PRETTY_FUNCTION__))
;
5527 Ops.push_back(DAG.getRegisterMask(Mask));
5528
5529 // If the glue is valid, it is the last operand.
5530 if (Glue.getNode())
5531 Ops.push_back(Glue);
5532}
5533
5534SDValue PPCTargetLowering::FinishCall(
5535 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5536 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5537 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5538 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5539 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5540
5541 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5542 Subtarget.isAIXABI())
5543 setUsesTOCBasePtr(DAG);
5544
5545 unsigned CallOpc =
5546 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5547 Subtarget, DAG.getTarget());
5548
5549 if (!CFlags.IsIndirect)
5550 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5551 else if (Subtarget.usesFunctionDescriptors())
5552 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5553 dl, CFlags.HasNest, Subtarget);
5554 else
5555 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5556
5557 // Build the operand list for the call instruction.
5558 SmallVector<SDValue, 8> Ops;
5559 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5560 SPDiff, Subtarget);
5561
5562 // Emit tail call.
5563 if (CFlags.IsTailCall) {
5564 // Indirect tail call when using PC Relative calls do not have the same
5565 // constraints.
5566 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5567 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5568 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5569 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5570 isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5571 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5572 "Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5573 "register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
5574 "used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5574, __PRETTY_FUNCTION__))
;
5575 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5576 assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5577, __PRETTY_FUNCTION__))
5577 "Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5577, __PRETTY_FUNCTION__))
;
5578 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5579 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5580 }
5581
5582 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5583 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5584 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5585 Glue = Chain.getValue(1);
5586
5587 // When performing tail call optimization the callee pops its arguments off
5588 // the stack. Account for this here so these bytes can be pushed back on in
5589 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5590 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5591 getTargetMachine().Options.GuaranteedTailCallOpt)
5592 ? NumBytes
5593 : 0;
5594
5595 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5596 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5597 Glue, dl);
5598 Glue = Chain.getValue(1);
5599
5600 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5601 DAG, InVals);
5602}
5603
5604SDValue
5605PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5606 SmallVectorImpl<SDValue> &InVals) const {
5607 SelectionDAG &DAG = CLI.DAG;
5608 SDLoc &dl = CLI.DL;
5609 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5610 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5611 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5612 SDValue Chain = CLI.Chain;
5613 SDValue Callee = CLI.Callee;
5614 bool &isTailCall = CLI.IsTailCall;
5615 CallingConv::ID CallConv = CLI.CallConv;
5616 bool isVarArg = CLI.IsVarArg;
5617 bool isPatchPoint = CLI.IsPatchPoint;
5618 const CallBase *CB = CLI.CB;
5619
5620 if (isTailCall) {
5621 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5622 isTailCall = false;
5623 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5624 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5625 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5626 else
5627 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5628 Ins, DAG);
5629 if (isTailCall) {
5630 ++NumTailCalls;
5631 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5632 ++NumSiblingCalls;
5633
5634 // PC Relative calls no longer guarantee that the callee is a Global
5635 // Address Node. The callee could be an indirect tail call in which
5636 // case the SDValue for the callee could be a load (to load the address
5637 // of a function pointer) or it may be a register copy (to move the
5638 // address of the callee from a function parameter into a virtual
5639 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5640 assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))
5641 isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))
5642 "Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5642, __PRETTY_FUNCTION__))
;
5643
5644 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
5645 << "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
;
5646 LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false)
;
5647 }
5648 }
5649
5650 if (!isTailCall && CB && CB->isMustTailCall())
5651 report_fatal_error("failed to perform tail call elimination on a call "
5652 "site marked musttail");
5653
5654 // When long calls (i.e. indirect calls) are always used, calls are always
5655 // made via function pointer. If we have a function name, first translate it
5656 // into a pointer.
5657 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5658 !isTailCall)
5659 Callee = LowerGlobalAddress(Callee, DAG);
5660
5661 CallFlags CFlags(
5662 CallConv, isTailCall, isVarArg, isPatchPoint,
5663 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5664 // hasNest
5665 Subtarget.is64BitELFABI() &&
5666 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5667 CLI.NoMerge);
5668
5669 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5670 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5671 InVals, CB);
5672
5673 if (Subtarget.isSVR4ABI())
5674 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5675 InVals, CB);
5676
5677 if (Subtarget.isAIXABI())
5678 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5679 InVals, CB);
5680
5681 return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5682 InVals, CB);
5683}
5684
5685SDValue PPCTargetLowering::LowerCall_32SVR4(
5686 SDValue Chain, SDValue Callee, CallFlags CFlags,
5687 const SmallVectorImpl<ISD::OutputArg> &Outs,
5688 const SmallVectorImpl<SDValue> &OutVals,
5689 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5690 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5691 const CallBase *CB) const {
5692 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5693 // of the 32-bit SVR4 ABI stack frame layout.
5694
5695 const CallingConv::ID CallConv = CFlags.CallConv;
5696 const bool IsVarArg = CFlags.IsVarArg;
5697 const bool IsTailCall = CFlags.IsTailCall;
5698
5699 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))
5700 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))
5701 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5701, __PRETTY_FUNCTION__))
;
5702
5703 const Align PtrAlign(4);
5704
5705 MachineFunction &MF = DAG.getMachineFunction();
5706
5707 // Mark this function as potentially containing a function that contains a
5708 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5709 // and restoring the callers stack pointer in this functions epilog. This is
5710 // done because by tail calling the called function might overwrite the value
5711 // in this function's (MF) stack pointer stack slot 0(SP).
5712 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5713 CallConv == CallingConv::Fast)
5714 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5715
5716 // Count how many bytes are to be pushed on the stack, including the linkage
5717 // area, parameter list area and the part of the local variable space which
5718 // contains copies of aggregates which are passed by value.
5719
5720 // Assign locations to all of the outgoing arguments.
5721 SmallVector<CCValAssign, 16> ArgLocs;
5722 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5723
5724 // Reserve space for the linkage area on the stack.
5725 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5726 PtrAlign);
5727 if (useSoftFloat())
5728 CCInfo.PreAnalyzeCallOperands(Outs);
5729
5730 if (IsVarArg) {
5731 // Handle fixed and variable vector arguments differently.
5732 // Fixed vector arguments go into registers as long as registers are
5733 // available. Variable vector arguments always go into memory.
5734 unsigned NumArgs = Outs.size();
5735
5736 for (unsigned i = 0; i != NumArgs; ++i) {
5737 MVT ArgVT = Outs[i].VT;
5738 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5739 bool Result;
5740
5741 if (Outs[i].IsFixed) {
5742 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5743 CCInfo);
5744 } else {
5745 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5746 ArgFlags, CCInfo);
5747 }
5748
5749 if (Result) {
5750#ifndef NDEBUG
5751 errs() << "Call operand #" << i << " has unhandled type "
5752 << EVT(ArgVT).getEVTString() << "\n";
5753#endif
5754 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5754)
;
5755 }
5756 }
5757 } else {
5758 // All arguments are treated the same.
5759 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5760 }
5761 CCInfo.clearWasPPCF128();
5762
5763 // Assign locations to all of the outgoing aggregate by value arguments.
5764 SmallVector<CCValAssign, 16> ByValArgLocs;
5765 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5766
5767 // Reserve stack space for the allocations in CCInfo.
5768 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5769
5770 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5771
5772 // Size of the linkage area, parameter list area and the part of the local
5773 // space variable where copies of aggregates which are passed by value are
5774 // stored.
5775 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5776
5777 // Calculate by how many bytes the stack has to be adjusted in case of tail
5778 // call optimization.
5779 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5780
5781 // Adjust the stack pointer for the new arguments...
5782 // These operations are automatically eliminated by the prolog/epilog pass
5783 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5784 SDValue CallSeqStart = Chain;
5785
5786 // Load the return address and frame pointer so it can be moved somewhere else
5787 // later.
5788 SDValue LROp, FPOp;
5789 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5790
5791 // Set up a copy of the stack pointer for use loading and storing any
5792 // arguments that may not fit in the registers available for argument
5793 // passing.
5794 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5795
5796 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5797 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5798 SmallVector<SDValue, 8> MemOpChains;
5799
5800 bool seenFloatArg = false;
5801 // Walk the register/memloc assignments, inserting copies/loads.
5802 // i - Tracks the index into the list of registers allocated for the call
5803 // RealArgIdx - Tracks the index into the list of actual function arguments
5804 // j - Tracks the index into the list of byval arguments
5805 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5806 i != e;
5807 ++i, ++RealArgIdx) {
5808 CCValAssign &VA = ArgLocs[i];
5809 SDValue Arg = OutVals[RealArgIdx];
5810 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5811
5812 if (Flags.isByVal()) {
5813 // Argument is an aggregate which is passed by value, thus we need to
5814 // create a copy of it in the local variable space of the current stack
5815 // frame (which is the stack frame of the caller) and pass the address of
5816 // this copy to the callee.
5817 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5817, __PRETTY_FUNCTION__))
;
5818 CCValAssign &ByValVA = ByValArgLocs[j++];
5819 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5819, __PRETTY_FUNCTION__))
;
5820
5821 // Memory reserved in the local variable space of the callers stack frame.
5822 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5823
5824 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5825 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5826 StackPtr, PtrOff);
5827
5828 // Create a copy of the argument in the local area of the current
5829 // stack frame.
5830 SDValue MemcpyCall =
5831 CreateCopyOfByValArgument(Arg, PtrOff,
5832 CallSeqStart.getNode()->getOperand(0),
5833 Flags, DAG, dl);
5834
5835 // This must go outside the CALLSEQ_START..END.
5836 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5837 SDLoc(MemcpyCall));
5838 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5839 NewCallSeqStart.getNode());
5840 Chain = CallSeqStart = NewCallSeqStart;
5841
5842 // Pass the address of the aggregate copy on the stack either in a
5843 // physical register or in the parameter list area of the current stack
5844 // frame to the callee.
5845 Arg = PtrOff;
5846 }
5847
5848 // When useCRBits() is true, there can be i1 arguments.
5849 // It is because getRegisterType(MVT::i1) => MVT::i1,
5850 // and for other integer types getRegisterType() => MVT::i32.
5851 // Extend i1 and ensure callee will get i32.
5852 if (Arg.getValueType() == MVT::i1)
5853 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5854 dl, MVT::i32, Arg);
5855
5856 if (VA.isRegLoc()) {
5857 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5858 // Put argument in a physical register.
5859 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5860 bool IsLE = Subtarget.isLittleEndian();
5861 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5862 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5863 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5864 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5865 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5866 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5867 SVal.getValue(0)));
5868 } else
5869 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5870 } else {
5871 // Put argument in the parameter list area of the current stack frame.
5872 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5872, __PRETTY_FUNCTION__))
;
5873 unsigned LocMemOffset = VA.getLocMemOffset();
5874
5875 if (!IsTailCall) {
5876 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5877 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5878 StackPtr, PtrOff);
5879
5880 MemOpChains.push_back(
5881 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5882 } else {
5883 // Calculate and remember argument location.
5884 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5885 TailCallArguments);
5886 }
5887 }
5888 }
5889
5890 if (!MemOpChains.empty())
5891 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5892
5893 // Build a sequence of copy-to-reg nodes chained together with token chain
5894 // and flag operands which copy the outgoing args into the appropriate regs.
5895 SDValue InFlag;
5896 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5897 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5898 RegsToPass[i].second, InFlag);
5899 InFlag = Chain.getValue(1);
5900 }
5901
5902 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5903 // registers.
5904 if (IsVarArg) {
5905 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5906 SDValue Ops[] = { Chain, InFlag };
5907
5908 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5909 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5910
5911 InFlag = Chain.getValue(1);
5912 }
5913
5914 if (IsTailCall)
5915 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5916 TailCallArguments);
5917
5918 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5919 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5920}
5921
5922// Copy an argument into memory, being careful to do this outside the
5923// call sequence for the call to which the argument belongs.
5924SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5925 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5926 SelectionDAG &DAG, const SDLoc &dl) const {
5927 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5928 CallSeqStart.getNode()->getOperand(0),
5929 Flags, DAG, dl);
5930 // The MEMCPY must go outside the CALLSEQ_START..END.
5931 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5932 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5933 SDLoc(MemcpyCall));
5934 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5935 NewCallSeqStart.getNode());
5936 return NewCallSeqStart;
5937}
5938
5939SDValue PPCTargetLowering::LowerCall_64SVR4(
5940 SDValue Chain, SDValue Callee, CallFlags CFlags,
5941 const SmallVectorImpl<ISD::OutputArg> &Outs,
5942 const SmallVectorImpl<SDValue> &OutVals,
5943 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5944 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5945 const CallBase *CB) const {
5946 bool isELFv2ABI = Subtarget.isELFv2ABI();
5947 bool isLittleEndian = Subtarget.isLittleEndian();
5948 unsigned NumOps = Outs.size();
5949 bool IsSibCall = false;
5950 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5951
5952 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5953 unsigned PtrByteSize = 8;
5954
5955 MachineFunction &MF = DAG.getMachineFunction();
5956
5957 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5958 IsSibCall = true;
5959
5960 // Mark this function as potentially containing a function that contains a
5961 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5962 // and restoring the callers stack pointer in this functions epilog. This is
5963 // done because by tail calling the called function might overwrite the value
5964 // in this function's (MF) stack pointer stack slot 0(SP).
5965 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5966 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5967
5968 assert(!(IsFastCall && CFlags.IsVarArg) &&((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5969, __PRETTY_FUNCTION__))
5969 "fastcc not supported on varargs functions")((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5969, __PRETTY_FUNCTION__))
;
5970
5971 // Count how many bytes are to be pushed on the stack, including the linkage
5972 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5973 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5974 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5975 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5976 unsigned NumBytes = LinkageSize;
5977 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5978
5979 static const MCPhysReg GPR[] = {
5980 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5981 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5982 };
5983 static const MCPhysReg VR[] = {
5984 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5985 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5986 };
5987
5988 const unsigned NumGPRs = array_lengthof(GPR);
5989 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5990 const unsigned NumVRs = array_lengthof(VR);
5991
5992 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5993 // can be passed to the callee in registers.
5994 // For the fast calling convention, there is another check below.
5995 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5996 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5997 if (!HasParameterArea) {
5998 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5999 unsigned AvailableFPRs = NumFPRs;
6000 unsigned AvailableVRs = NumVRs;
6001 unsigned NumBytesTmp = NumBytes;
6002 for (unsigned i = 0; i != NumOps; ++i) {
6003 if (Outs[i].Flags.isNest()) continue;
6004 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6005 PtrByteSize, LinkageSize, ParamAreaSize,
6006 NumBytesTmp, AvailableFPRs, AvailableVRs))
6007 HasParameterArea = true;
6008 }
6009 }
6010
6011 // When using the fast calling convention, we don't provide backing for
6012 // arguments that will be in registers.
6013 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6014
6015 // Avoid allocating parameter area for fastcc functions if all the arguments
6016 // can be passed in the registers.
6017 if (IsFastCall)
6018 HasParameterArea = false;
6019
6020 // Add up all the space actually used.
6021 for (unsigned i = 0; i != NumOps; ++i) {
6022 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6023 EVT ArgVT = Outs[i].VT;
6024 EVT OrigVT = Outs[i].ArgVT;
6025
6026 if (Flags.isNest())
6027 continue;
6028
6029 if (IsFastCall) {
6030 if (Flags.isByVal()) {
6031 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6032 if (NumGPRsUsed > NumGPRs)
6033 HasParameterArea = true;
6034 } else {
6035 switch (ArgVT.getSimpleVT().SimpleTy) {
6036 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6036)
;
6037 case MVT::i1:
6038 case MVT::i32:
6039 case MVT::i64:
6040 if (++NumGPRsUsed <= NumGPRs)
6041 continue;
6042 break;
6043 case MVT::v4i32:
6044 case MVT::v8i16:
6045 case MVT::v16i8:
6046 case MVT::v2f64:
6047 case MVT::v2i64:
6048 case MVT::v1i128:
6049 case MVT::f128:
6050 if (++NumVRsUsed <= NumVRs)
6051 continue;
6052 break;
6053 case MVT::v4f32:
6054 if (++NumVRsUsed <= NumVRs)
6055 continue;
6056 break;
6057 case MVT::f32:
6058 case MVT::f64:
6059 if (++NumFPRsUsed <= NumFPRs)
6060 continue;
6061 break;
6062 }
6063 HasParameterArea = true;
6064 }
6065 }
6066
6067 /* Respect alignment of argument on the stack. */
6068 auto Alignement =
6069 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6070 NumBytes = alignTo(NumBytes, Alignement);
6071
6072 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6073 if (Flags.isInConsecutiveRegsLast())
6074 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6075 }
6076
6077 unsigned NumBytesActuallyUsed = NumBytes;
6078
6079 // In the old ELFv1 ABI,
6080 // the prolog code of the callee may store up to 8 GPR argument registers to
6081 // the stack, allowing va_start to index over them in memory if its varargs.
6082 // Because we cannot tell if this is needed on the caller side, we have to
6083 // conservatively assume that it is needed. As such, make sure we have at
6084 // least enough stack space for the caller to store the 8 GPRs.
6085 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6086 // really requires memory operands, e.g. a vararg function.
6087 if (HasParameterArea)
6088 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6089 else
6090 NumBytes = LinkageSize;
6091
6092 // Tail call needs the stack to be aligned.
6093 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6094 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6095
6096 int SPDiff = 0;
6097
6098 // Calculate by how many bytes the stack has to be adjusted in case of tail
6099 // call optimization.
6100 if (!IsSibCall)
6101 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6102
6103 // To protect arguments on the stack from being clobbered in a tail call,
6104 // force all the loads to happen before doing any other lowering.
6105 if (CFlags.IsTailCall)
6106 Chain = DAG.getStackArgumentTokenFactor(Chain);
6107
6108 // Adjust the stack pointer for the new arguments...
6109 // These operations are automatically eliminated by the prolog/epilog pass
6110 if (!IsSibCall)
6111 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6112 SDValue CallSeqStart = Chain;
6113
6114 // Load the return address and frame pointer so it can be move somewhere else
6115 // later.
6116 SDValue LROp, FPOp;
6117 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6118
6119 // Set up a copy of the stack pointer for use loading and storing any
6120 // arguments that may not fit in the registers available for argument
6121 // passing.
6122 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6123
6124 // Figure out which arguments are going to go in registers, and which in
6125 // memory. Also, if this is a vararg function, floating point operations
6126 // must be stored to our stack, and loaded into integer regs as well, if
6127 // any integer regs are available for argument passing.
6128 unsigned ArgOffset = LinkageSize;
6129
6130 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6131 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6132
6133 SmallVector<SDValue, 8> MemOpChains;
6134 for (unsigned i = 0; i != NumOps; ++i) {
6135 SDValue Arg = OutVals[i];
6136 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6137 EVT ArgVT = Outs[i].VT;
6138 EVT OrigVT = Outs[i].ArgVT;
6139
6140 // PtrOff will be used to store the current argument to the stack if a
6141 // register cannot be found for it.
6142 SDValue PtrOff;
6143
6144 // We re-align the argument offset for each argument, except when using the
6145 // fast calling convention, when we need to make sure we do that only when
6146 // we'll actually use a stack slot.
6147 auto ComputePtrOff = [&]() {
6148 /* Respect alignment of argument on the stack. */
6149 auto Alignment =
6150 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6151 ArgOffset = alignTo(ArgOffset, Alignment);
6152
6153 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6154
6155 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6156 };
6157
6158 if (!IsFastCall) {
6159 ComputePtrOff();
6160
6161 /* Compute GPR index associated with argument offset. */
6162 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6163 GPR_idx = std::min(GPR_idx, NumGPRs);
6164 }
6165
6166 // Promote integers to 64-bit values.
6167 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6168 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6169 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6170 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6171 }
6172
6173 // FIXME memcpy is used way more than necessary. Correctness first.
6174 // Note: "by value" is code for passing a structure by value, not
6175 // basic types.
6176 if (Flags.isByVal()) {
6177 // Note: Size includes alignment padding, so
6178 // struct x { short a; char b; }
6179 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6180 // These are the proper values we need for right-justifying the
6181 // aggregate in a parameter register.
6182 unsigned Size = Flags.getByValSize();
6183
6184 // An empty aggregate parameter takes up no storage and no
6185 // registers.
6186 if (Size == 0)
6187 continue;
6188
6189 if (IsFastCall)
6190 ComputePtrOff();
6191
6192 // All aggregates smaller than 8 bytes must be passed right-justified.
6193 if (Size==1 || Size==2 || Size==4) {
6194 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6195 if (GPR_idx != NumGPRs) {
6196 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6197 MachinePointerInfo(), VT);
6198 MemOpChains.push_back(Load.getValue(1));
6199 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6200
6201 ArgOffset += PtrByteSize;
6202 continue;
6203 }
6204 }
6205
6206 if (GPR_idx == NumGPRs && Size < 8) {
6207 SDValue AddPtr = PtrOff;
6208 if (!isLittleEndian) {
6209 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6210 PtrOff.getValueType());
6211 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6212 }
6213 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6214 CallSeqStart,
6215 Flags, DAG, dl);
6216 ArgOffset += PtrByteSize;
6217 continue;
6218 }
6219 // Copy entire object into memory. There are cases where gcc-generated
6220 // code assumes it is there, even if it could be put entirely into
6221 // registers. (This is not what the doc says.)
6222
6223 // FIXME: The above statement is likely due to a misunderstanding of the
6224 // documents. All arguments must be copied into the parameter area BY
6225 // THE CALLEE in the event that the callee takes the address of any
6226 // formal argument. That has not yet been implemented. However, it is
6227 // reasonable to use the stack area as a staging area for the register
6228 // load.
6229
6230 // Skip this for small aggregates, as we will use the same slot for a
6231 // right-justified copy, below.
6232 if (Size >= 8)
6233 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6234 CallSeqStart,
6235 Flags, DAG, dl);
6236
6237 // When a register is available, pass a small aggregate right-justified.
6238 if (Size < 8 && GPR_idx != NumGPRs) {
6239 // The easiest way to get this right-justified in a register
6240 // is to copy the structure into the rightmost portion of a
6241 // local variable slot, then load the whole slot into the
6242 // register.
6243 // FIXME: The memcpy seems to produce pretty awful code for
6244 // small aggregates, particularly for packed ones.
6245 // FIXME: It would be preferable to use the slot in the
6246 // parameter save area instead of a new local variable.
6247 SDValue AddPtr = PtrOff;
6248 if (!isLittleEndian) {
6249 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6250 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6251 }
6252 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6253 CallSeqStart,
6254 Flags, DAG, dl);
6255
6256 // Load the slot into the register.
6257 SDValue Load =
6258 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6259 MemOpChains.push_back(Load.getValue(1));
6260 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6261
6262 // Done with this argument.
6263 ArgOffset += PtrByteSize;
6264 continue;
6265 }
6266
6267 // For aggregates larger than PtrByteSize, copy the pieces of the
6268 // object that fit into registers from the parameter save area.
6269 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6270 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6271 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6272 if (GPR_idx != NumGPRs) {
6273 SDValue Load =
6274 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6275 MemOpChains.push_back(Load.getValue(1));
6276 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6277 ArgOffset += PtrByteSize;
6278 } else {
6279 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6280 break;
6281 }
6282 }
6283 continue;
6284 }
6285
6286 switch (Arg.getSimpleValueType().SimpleTy) {
6287 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6287)
;
6288 case MVT::i1:
6289 case MVT::i32:
6290 case MVT::i64:
6291 if (Flags.isNest()) {
6292 // The 'nest' parameter, if any, is passed in R11.
6293 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6294 break;
6295 }
6296
6297 // These can be scalar arguments or elements of an integer array type
6298 // passed directly. Clang may use those instead of "byval" aggregate
6299 // types to avoid forcing arguments to memory unnecessarily.
6300 if (GPR_idx != NumGPRs) {
6301 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6302 } else {
6303 if (IsFastCall)
6304 ComputePtrOff();
6305
6306 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6307, __PRETTY_FUNCTION__))
6307 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6307, __PRETTY_FUNCTION__))
;
6308 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6309 true, CFlags.IsTailCall, false, MemOpChains,
6310 TailCallArguments, dl);
6311 if (IsFastCall)
6312 ArgOffset += PtrByteSize;
6313 }
6314 if (!IsFastCall)
6315 ArgOffset += PtrByteSize;
6316 break;
6317 case MVT::f32:
6318 case MVT::f64: {
6319 // These can be scalar arguments or elements of a float array type
6320 // passed directly. The latter are used to implement ELFv2 homogenous
6321 // float aggregates.
6322
6323 // Named arguments go into FPRs first, and once they overflow, the
6324 // remaining arguments go into GPRs and then the parameter save area.
6325 // Unnamed arguments for vararg functions always go to GPRs and
6326 // then the parameter save area. For now, put all arguments to vararg
6327 // routines always in both locations (FPR *and* GPR or stack slot).
6328 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6329 bool NeededLoad = false;
6330
6331 // First load the argument into the next available FPR.
6332 if (FPR_idx != NumFPRs)
6333 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6334
6335 // Next, load the argument into GPR or stack slot if needed.
6336 if (!NeedGPROrStack)
6337 ;
6338 else if (GPR_idx != NumGPRs && !IsFastCall) {
6339 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6340 // once we support fp <-> gpr moves.
6341
6342 // In the non-vararg case, this can only ever happen in the
6343 // presence of f32 array types, since otherwise we never run
6344 // out of FPRs before running out of GPRs.
6345 SDValue ArgVal;
6346
6347 // Double values are always passed in a single GPR.
6348 if (Arg.getValueType() != MVT::f32) {
6349 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6350
6351 // Non-array float values are extended and passed in a GPR.
6352 } else if (!Flags.isInConsecutiveRegs()) {
6353 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6354 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6355
6356 // If we have an array of floats, we collect every odd element
6357 // together with its predecessor into one GPR.
6358 } else if (ArgOffset % PtrByteSize != 0) {
6359 SDValue Lo, Hi;
6360 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6361 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6362 if (!isLittleEndian)
6363 std::swap(Lo, Hi);
6364 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6365
6366 // The final element, if even, goes into the first half of a GPR.
6367 } else if (Flags.isInConsecutiveRegsLast()) {
6368 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6369 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6370 if (!isLittleEndian)
6371 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6372 DAG.getConstant(32, dl, MVT::i32));
6373
6374 // Non-final even elements are skipped; they will be handled
6375 // together the with subsequent argument on the next go-around.
6376 } else
6377 ArgVal = SDValue();
6378
6379 if (ArgVal.getNode())
6380 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6381 } else {
6382 if (IsFastCall)
6383 ComputePtrOff();
6384
6385 // Single-precision floating-point values are mapped to the
6386 // second (rightmost) word of the stack doubleword.
6387 if (Arg.getValueType() == MVT::f32 &&
6388 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6389 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6390 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6391 }
6392
6393 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6394, __PRETTY_FUNCTION__))
6394 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6394, __PRETTY_FUNCTION__))
;
6395 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6396 true, CFlags.IsTailCall, false, MemOpChains,
6397 TailCallArguments, dl);
6398
6399 NeededLoad = true;
6400 }
6401 // When passing an array of floats, the array occupies consecutive
6402 // space in the argument area; only round up to the next doubleword
6403 // at the end of the array. Otherwise, each float takes 8 bytes.
6404 if (!IsFastCall || NeededLoad) {
6405 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6406 Flags.isInConsecutiveRegs()) ? 4 : 8;
6407 if (Flags.isInConsecutiveRegsLast())
6408 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6409 }
6410 break;
6411 }
6412 case MVT::v4f32:
6413 case MVT::v4i32:
6414 case MVT::v8i16:
6415 case MVT::v16i8:
6416 case MVT::v2f64:
6417 case MVT::v2i64:
6418 case MVT::v1i128:
6419 case MVT::f128:
6420 // These can be scalar arguments or elements of a vector array type
6421 // passed directly. The latter are used to implement ELFv2 homogenous
6422 // vector aggregates.
6423
6424 // For a varargs call, named arguments go into VRs or on the stack as
6425 // usual; unnamed arguments always go to the stack or the corresponding
6426 // GPRs when within range. For now, we always put the value in both
6427 // locations (or even all three).
6428 if (CFlags.IsVarArg) {
6429 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__))
6430 "Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6430, __PRETTY_FUNCTION__))
;
6431 // We could elide this store in the case where the object fits
6432 // entirely in R registers. Maybe later.
6433 SDValue Store =
6434 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6435 MemOpChains.push_back(Store);
6436 if (VR_idx != NumVRs) {
6437 SDValue Load =
6438 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6439 MemOpChains.push_back(Load.getValue(1));
6440 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6441 }
6442 ArgOffset += 16;
6443 for (unsigned i=0; i<16; i+=PtrByteSize) {
6444 if (GPR_idx == NumGPRs)
6445 break;
6446 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6447 DAG.getConstant(i, dl, PtrVT));
6448 SDValue Load =
6449 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6450 MemOpChains.push_back(Load.getValue(1));
6451 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6452 }
6453 break;
6454 }
6455
6456 // Non-varargs Altivec params go into VRs or on the stack.
6457 if (VR_idx != NumVRs) {
6458 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6459 } else {
6460 if (IsFastCall)
6461 ComputePtrOff();
6462
6463 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6464, __PRETTY_FUNCTION__))
6464 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6464, __PRETTY_FUNCTION__))
;
6465 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6466 true, CFlags.IsTailCall, true, MemOpChains,
6467 TailCallArguments, dl);
6468 if (IsFastCall)
6469 ArgOffset += 16;
6470 }
6471
6472 if (!IsFastCall)
6473 ArgOffset += 16;
6474 break;
6475 }
6476 }
6477
6478 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6479, __PRETTY_FUNCTION__))
6479 "mismatch in size of parameter area")(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6479, __PRETTY_FUNCTION__))
;
6480 (void)NumBytesActuallyUsed;
6481
6482 if (!MemOpChains.empty())
6483 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6484
6485 // Check if this is an indirect call (MTCTR/BCTRL).
6486 // See prepareDescriptorIndirectCall and buildCallOperands for more
6487 // information about calls through function pointers in the 64-bit SVR4 ABI.
6488 if (CFlags.IsIndirect) {
6489 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6490 // caller in the TOC save area.
6491 if (isTOCSaveRestoreRequired(Subtarget)) {
6492 assert(!CFlags.IsTailCall && "Indirect tails calls not supported")((!CFlags.IsTailCall && "Indirect tails calls not supported"
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tails calls not supported\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6492, __PRETTY_FUNCTION__))
;
6493 // Load r2 into a virtual register and store it to the TOC save area.
6494 setUsesTOCBasePtr(DAG);
6495 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6496 // TOC save area offset.
6497 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6498 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6499 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6500 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6501 MachinePointerInfo::getStack(
6502 DAG.getMachineFunction(), TOCSaveOffset));
6503 }
6504 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6505 // This does not mean the MTCTR instruction must use R12; it's easier
6506 // to model this as an extra parameter, so do that.
6507 if (isELFv2ABI && !CFlags.IsPatchPoint)
6508 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6509 }
6510
6511 // Build a sequence of copy-to-reg nodes chained together with token chain
6512 // and flag operands which copy the outgoing args into the appropriate regs.
6513 SDValue InFlag;
6514 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6515 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6516 RegsToPass[i].second, InFlag);
6517 InFlag = Chain.getValue(1);
6518 }
6519
6520 if (CFlags.IsTailCall && !IsSibCall)
6521 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6522 TailCallArguments);
6523
6524 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6525 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6526}
6527
6528SDValue PPCTargetLowering::LowerCall_Darwin(
6529 SDValue Chain, SDValue Callee, CallFlags CFlags,
6530 const SmallVectorImpl<ISD::OutputArg> &Outs,
6531 const SmallVectorImpl<SDValue> &OutVals,
6532 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6533 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6534 const CallBase *CB) const {
6535 unsigned NumOps = Outs.size();
6536
6537 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6538 bool isPPC64 = PtrVT == MVT::i64;
6539 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6540
6541 MachineFunction &MF = DAG.getMachineFunction();
6542
6543 // Mark this function as potentially containing a function that contains a
6544 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6545 // and restoring the callers stack pointer in this functions epilog. This is
6546 // done because by tail calling the called function might overwrite the value
6547 // in this function's (MF) stack pointer stack slot 0(SP).
6548 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6549 CFlags.CallConv == CallingConv::Fast)
6550 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6551
6552 // Count how many bytes are to be pushed on the stack, including the linkage
6553 // area, and parameter passing area. We start with 24/48 bytes, which is
6554 // prereserved space for [SP][CR][LR][3 x unused].
6555 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6556 unsigned NumBytes = LinkageSize;
6557
6558 // Add up all the space actually used.
6559 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6560 // they all go in registers, but we must reserve stack space for them for
6561 // possible use by the caller. In varargs or 64-bit calls, parameters are
6562 // assigned stack space in order, with padding so Altivec parameters are
6563 // 16-byte aligned.
6564 unsigned nAltivecParamsAtEnd = 0;
6565 for (unsigned i = 0; i != NumOps; ++i) {
6566 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6567 EVT ArgVT = Outs[i].VT;
6568 // Varargs Altivec parameters are padded to a 16 byte boundary.
6569 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6570 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6571 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6572 if (!CFlags.IsVarArg && !isPPC64) {
6573 // Non-varargs Altivec parameters go after all the non-Altivec
6574 // parameters; handle those later so we know how much padding we need.
6575 nAltivecParamsAtEnd++;
6576 continue;
6577 }
6578 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6579 NumBytes = ((NumBytes+15)/16)*16;
6580 }
6581 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6582 }
6583
6584 // Allow for Altivec parameters at the end, if needed.
6585 if (nAltivecParamsAtEnd) {
6586 NumBytes = ((NumBytes+15)/16)*16;
6587 NumBytes += 16*nAltivecParamsAtEnd;
6588 }
6589
6590 // The prolog code of the callee may store up to 8 GPR argument registers to
6591 // the stack, allowing va_start to index over them in memory if its varargs.
6592 // Because we cannot tell if this is needed on the caller side, we have to
6593 // conservatively assume that it is needed. As such, make sure we have at
6594 // least enough stack space for the caller to store the 8 GPRs.
6595 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6596
6597 // Tail call needs the stack to be aligned.
6598 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6599 CFlags.CallConv == CallingConv::Fast)
6600 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6601
6602 // Calculate by how many bytes the stack has to be adjusted in case of tail
6603 // call optimization.
6604 int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6605
6606 // To protect arguments on the stack from being clobbered in a tail call,
6607 // force all the loads to happen before doing any other lowering.
6608 if (CFlags.IsTailCall)
6609 Chain = DAG.getStackArgumentTokenFactor(Chain);
6610
6611 // Adjust the stack pointer for the new arguments...
6612 // These operations are automatically eliminated by the prolog/epilog pass
6613 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6614 SDValue CallSeqStart = Chain;
6615
6616 // Load the return address and frame pointer so it can be move somewhere else
6617 // later.
6618 SDValue LROp, FPOp;
6619 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6620
6621 // Set up a copy of the stack pointer for use loading and storing any
6622 // arguments that may not fit in the registers available for argument
6623 // passing.
6624 SDValue StackPtr;
6625 if (isPPC64)
6626 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6627 else
6628 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6629
6630 // Figure out which arguments are going to go in registers, and which in
6631 // memory. Also, if this is a vararg function, floating point operations
6632 // must be stored to our stack, and loaded into integer regs as well, if
6633 // any integer regs are available for argument passing.
6634 unsigned ArgOffset = LinkageSize;
6635 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6636
6637 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6638 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6639 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6640 };
6641 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6642 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6643 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6644 };
6645 static const MCPhysReg VR[] = {
6646 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6647 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6648 };
6649 const unsigned NumGPRs = array_lengthof(GPR_32);
6650 const unsigned NumFPRs = 13;
6651 const unsigned NumVRs = array_lengthof(VR);
6652
6653 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6654
6655 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6656 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6657
6658 SmallVector<SDValue, 8> MemOpChains;
6659 for (unsigned i = 0; i != NumOps; ++i) {
6660 SDValue Arg = OutVals[i];
6661 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6662
6663 // PtrOff will be used to store the current argument to the stack if a
6664 // register cannot be found for it.
6665 SDValue PtrOff;
6666
6667 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6668
6669 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6670
6671 // On PPC64, promote integers to 64-bit values.
6672 if (isPPC64 && Arg.getValueType() == MVT::i32) {
6673 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6674 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6675 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6676 }
6677
6678 // FIXME memcpy is used way more than necessary. Correctness first.
6679 // Note: "by value" is code for passing a structure by value, not
6680 // basic types.
6681 if (Flags.isByVal()) {
6682 unsigned Size = Flags.getByValSize();
6683 // Very small objects are passed right-justified. Everything else is
6684 // passed left-justified.
6685 if (Size==1 || Size==2) {
6686 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6687 if (GPR_idx != NumGPRs) {
6688 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6689 MachinePointerInfo(), VT);
6690 MemOpChains.push_back(Load.getValue(1));
6691 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6692
6693 ArgOffset += PtrByteSize;
6694 } else {
6695 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6696 PtrOff.getValueType());
6697 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6698 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6699 CallSeqStart,
6700 Flags, DAG, dl);
6701 ArgOffset += PtrByteSize;
6702 }
6703 continue;
6704 }
6705 // Copy entire object into memory. There are cases where gcc-generated
6706 // code assumes it is there, even if it could be put entirely into
6707 // registers. (This is not what the doc says.)
6708 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6709 CallSeqStart,
6710 Flags, DAG, dl);
6711
6712 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6713 // copy the pieces of the object that fit into registers from the
6714 // parameter save area.
6715 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6716 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6717 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6718 if (GPR_idx != NumGPRs) {
6719 SDValue Load =
6720 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6721 MemOpChains.push_back(Load.getValue(1));
6722 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6723 ArgOffset += PtrByteSize;
6724 } else {
6725 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6726 break;
6727 }
6728 }
6729 continue;
6730 }
6731
6732 switch (Arg.getSimpleValueType().SimpleTy) {
6733 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6733)
;
6734 case MVT::i1:
6735 case MVT::i32:
6736 case MVT::i64:
6737 if (GPR_idx != NumGPRs) {
6738 if (Arg.getValueType() == MVT::i1)
6739 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6740
6741 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6742 } else {
6743 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6744 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6745 TailCallArguments, dl);
6746 }
6747 ArgOffset += PtrByteSize;
6748 break;
6749 case MVT::f32:
6750 case MVT::f64:
6751 if (FPR_idx != NumFPRs) {
6752 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6753
6754 if (CFlags.IsVarArg) {
6755 SDValue Store =
6756 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6757 MemOpChains.push_back(Store);
6758
6759 // Float varargs are always shadowed in available integer registers
6760 if (GPR_idx != NumGPRs) {
6761 SDValue Load =
6762 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6763 MemOpChains.push_back(Load.getValue(1));
6764 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6765 }
6766 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6767 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6768 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6769 SDValue Load =
6770 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6771 MemOpChains.push_back(Load.getValue(1));
6772 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6773 }
6774 } else {
6775 // If we have any FPRs remaining, we may also have GPRs remaining.
6776 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6777 // GPRs.
6778 if (GPR_idx != NumGPRs)
6779 ++GPR_idx;
6780 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6781 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6782 ++GPR_idx;
6783 }
6784 } else
6785 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6786 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6787 TailCallArguments, dl);
6788 if (isPPC64)
6789 ArgOffset += 8;
6790 else
6791 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6792 break;
6793 case MVT::v4f32:
6794 case MVT::v4i32:
6795 case MVT::v8i16:
6796 case MVT::v16i8:
6797 if (CFlags.IsVarArg) {
6798 // These go aligned on the stack, or in the corresponding R registers
6799 // when within range. The Darwin PPC ABI doc claims they also go in
6800 // V registers; in fact gcc does this only for arguments that are
6801 // prototyped, not for those that match the ... We do it for all
6802 // arguments, seems to work.
6803 while (ArgOffset % 16 !=0) {
6804 ArgOffset += PtrByteSize;
6805 if (GPR_idx != NumGPRs)
6806 GPR_idx++;
6807 }
6808 // We could elide this store in the case where the object fits
6809 // entirely in R registers. Maybe later.
6810 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6811 DAG.getConstant(ArgOffset, dl, PtrVT));
6812 SDValue Store =
6813 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6814 MemOpChains.push_back(Store);
6815 if (VR_idx != NumVRs) {
6816 SDValue Load =
6817 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6818 MemOpChains.push_back(Load.getValue(1));
6819 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6820 }
6821 ArgOffset += 16;
6822 for (unsigned i=0; i<16; i+=PtrByteSize) {
6823 if (GPR_idx == NumGPRs)
6824 break;
6825 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6826 DAG.getConstant(i, dl, PtrVT));
6827 SDValue Load =
6828 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6829 MemOpChains.push_back(Load.getValue(1));
6830 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6831 }
6832 break;
6833 }
6834
6835 // Non-varargs Altivec params generally go in registers, but have
6836 // stack space allocated at the end.
6837 if (VR_idx != NumVRs) {
6838 // Doesn't have GPR space allocated.
6839 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6840 } else if (nAltivecParamsAtEnd==0) {
6841 // We are emitting Altivec params in order.
6842 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6843 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6844 TailCallArguments, dl);
6845 ArgOffset += 16;
6846 }
6847 break;
6848 }
6849 }
6850 // If all Altivec parameters fit in registers, as they usually do,
6851 // they get stack space following the non-Altivec parameters. We
6852 // don't track this here because nobody below needs it.
6853 // If there are more Altivec parameters than fit in registers emit
6854 // the stores here.
6855 if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6856 unsigned j = 0;
6857 // Offset is aligned; skip 1st 12 params which go in V registers.
6858 ArgOffset = ((ArgOffset+15)/16)*16;
6859 ArgOffset += 12*16;
6860 for (unsigned i = 0; i != NumOps; ++i) {
6861 SDValue Arg = OutVals[i];
6862 EVT ArgType = Outs[i].VT;
6863 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6864 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6865 if (++j > NumVRs) {
6866 SDValue PtrOff;
6867 // We are emitting Altivec params in order.
6868 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6869 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6870 TailCallArguments, dl);
6871 ArgOffset += 16;
6872 }
6873 }
6874 }
6875 }
6876
6877 if (!MemOpChains.empty())
6878 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6879
6880 // On Darwin, R12 must contain the address of an indirect callee. This does
6881 // not mean the MTCTR instruction must use R12; it's easier to model this as
6882 // an extra parameter, so do that.
6883 if (CFlags.IsIndirect) {
6884 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6884, __PRETTY_FUNCTION__))
;
6885 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6886 PPC::R12), Callee));
6887 }
6888
6889 // Build a sequence of copy-to-reg nodes chained together with token chain
6890 // and flag operands which copy the outgoing args into the appropriate regs.
6891 SDValue InFlag;
6892 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6893 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6894 RegsToPass[i].second, InFlag);
6895 InFlag = Chain.getValue(1);
6896 }
6897
6898 if (CFlags.IsTailCall)
6899 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6900 TailCallArguments);
6901
6902 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6903 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6904}
6905
6906static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6907 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6908 CCState &State) {
6909
6910 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6911 State.getMachineFunction().getSubtarget());
6912 const bool IsPPC64 = Subtarget.isPPC64();
6913 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6914 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6915
6916 assert((!ValVT.isInteger() ||(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))
6917 (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))
6918 "Integer argument exceeds register size: should have been legalized")(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6918, __PRETTY_FUNCTION__))
;
6919
6920 if (ValVT == MVT::f128)
6921 report_fatal_error("f128 is unimplemented on AIX.");
6922
6923 if (ArgFlags.isNest())
6924 report_fatal_error("Nest arguments are unimplemented.");
6925
6926 if (ValVT.isVector() || LocVT.isVector())
6927 report_fatal_error("Vector arguments are unimplemented on AIX.");
6928
6929 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6930 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6931 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6932 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6933 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6934 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6935
6936 if (ArgFlags.isByVal()) {
6937 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6938 report_fatal_error("Pass-by-value arguments with alignment greater than "
6939 "register width are not supported.");
6940
6941 const unsigned ByValSize = ArgFlags.getByValSize();
6942
6943 // An empty aggregate parameter takes up no storage and no registers,
6944 // but needs a MemLoc for a stack slot for the formal arguments side.
6945 if (ByValSize == 0) {
6946 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6947 State.getNextStackOffset(), RegVT,
6948 LocInfo));
6949 return false;
6950 }
6951
6952 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6953 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6954 for (const unsigned E = Offset + StackSize; Offset < E;
6955 Offset += PtrAlign.value()) {
6956 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6957 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6958 else {
6959 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6960 Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6961 LocInfo));
6962 break;
6963 }
6964 }
6965 return false;
6966 }
6967
6968 // Arguments always reserve parameter save area.
6969 switch (ValVT.SimpleTy) {
6970 default:
6971 report_fatal_error("Unhandled value type for argument.");
6972 case MVT::i64:
6973 // i64 arguments should have been split to i32 for PPC32.
6974 assert(IsPPC64 && "PPC32 should have split i64 values.")((IsPPC64 && "PPC32 should have split i64 values.") ?
static_cast<void> (0) : __assert_fail ("IsPPC64 && \"PPC32 should have split i64 values.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6974, __PRETTY_FUNCTION__))
;
6975 LLVM_FALLTHROUGH[[gnu::fallthrough]];
6976 case MVT::i1:
6977 case MVT::i32: {
6978 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6979 // AIX integer arguments are always passed in register width.
6980 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6981 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6982 : CCValAssign::LocInfo::ZExt;
6983 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6984 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6985 else
6986 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6987
6988 return false;
6989 }
6990 case MVT::f32:
6991 case MVT::f64: {
6992 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6993 const unsigned StoreSize = LocVT.getStoreSize();
6994 // Floats are always 4-byte aligned in the PSA on AIX.
6995 // This includes f64 in 64-bit mode for ABI compatibility.
6996 const unsigned Offset =
6997 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6998 unsigned FReg = State.AllocateReg(FPR);
6999 if (FReg)
7000 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7001
7002 // Reserve and initialize GPRs or initialize the PSA as required.
7003 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
7004 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7005 assert(FReg && "An FPR should be available when a GPR is reserved.")((FReg && "An FPR should be available when a GPR is reserved."
) ? static_cast<void> (0) : __assert_fail ("FReg && \"An FPR should be available when a GPR is reserved.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7005, __PRETTY_FUNCTION__))
;
7006 if (State.isVarArg()) {
7007 // Successfully reserved GPRs are only initialized for vararg calls.
7008 // Custom handling is required for:
7009 // f64 in PPC32 needs to be split into 2 GPRs.
7010 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7011 State.addLoc(
7012 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7013 }
7014 } else {
7015 // If there are insufficient GPRs, the PSA needs to be initialized.
7016 // Initialization occurs even if an FPR was initialized for
7017 // compatibility with the AIX XL compiler. The full memory for the
7018 // argument will be initialized even if a prior word is saved in GPR.
7019 // A custom memLoc is used when the argument also passes in FPR so
7020 // that the callee handling can skip over it easily.
7021 State.addLoc(
7022 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7023 LocInfo)
7024 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7025 break;
7026 }
7027 }
7028
7029 return false;
7030 }
7031 }
7032 return true;
7033}
7034
7035static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7036 bool IsPPC64) {
7037 assert((IsPPC64 || SVT != MVT::i64) &&(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7038, __PRETTY_FUNCTION__))
7038 "i64 should have been split for 32-bit codegen.")(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7038, __PRETTY_FUNCTION__))
;
7039
7040 switch (SVT) {
7041 default:
7042 report_fatal_error("Unexpected value type for formal argument");
7043 case MVT::i1:
7044 case MVT::i32:
7045 case MVT::i64:
7046 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7047 case MVT::f32:
7048 return &PPC::F4RCRegClass;
7049 case MVT::f64:
7050 return &PPC::F8RCRegClass;
7051 }
7052}
7053
7054static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7055 SelectionDAG &DAG, SDValue ArgValue,
7056 MVT LocVT, const SDLoc &dl) {
7057 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger())((ValVT.isScalarInteger() && LocVT.isScalarInteger())
? static_cast<void> (0) : __assert_fail ("ValVT.isScalarInteger() && LocVT.isScalarInteger()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7057, __PRETTY_FUNCTION__))
;
7058 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())((ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())
? static_cast<void> (0) : __assert_fail ("ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7058, __PRETTY_FUNCTION__))
;
7059
7060 if (Flags.isSExt())
7061 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7062 DAG.getValueType(ValVT));
7063 else if (Flags.isZExt())
7064 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7065 DAG.getValueType(ValVT));
7066
7067 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7068}
7069
7070static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7071 const unsigned LASize = FL->getLinkageSize();
7072
7073 if (PPC::GPRCRegClass.contains(Reg)) {
7074 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))
7075 "Reg must be a valid argument register!")((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7075, __PRETTY_FUNCTION__))
;
7076 return LASize + 4 * (Reg - PPC::R3);
7077 }
7078
7079 if (PPC::G8RCRegClass.contains(Reg)) {
7080 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7081, __PRETTY_FUNCTION__))
7081 "Reg must be a valid argument register!")((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7081, __PRETTY_FUNCTION__))
;
7082 return LASize + 8 * (Reg - PPC::X3);
7083 }
7084
7085 llvm_unreachable("Only general purpose registers expected.")::llvm::llvm_unreachable_internal("Only general purpose registers expected."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7085)
;
7086}
7087
7088// AIX ABI Stack Frame Layout:
7089//
7090// Low Memory +--------------------------------------------+
7091// SP +---> | Back chain | ---+
7092// | +--------------------------------------------+ |
7093// | | Saved Condition Register | |
7094// | +--------------------------------------------+ |
7095// | | Saved Linkage Register | |
7096// | +--------------------------------------------+ | Linkage Area
7097// | | Reserved for compilers | |
7098// | +--------------------------------------------+ |
7099// | | Reserved for binders | |
7100// | +--------------------------------------------+ |
7101// | | Saved TOC pointer | ---+
7102// | +--------------------------------------------+
7103// | | Parameter save area |
7104// | +--------------------------------------------+
7105// | | Alloca space |
7106// | +--------------------------------------------+
7107// | | Local variable space |
7108// | +--------------------------------------------+
7109// | | Float/int conversion temporary |
7110// | +--------------------------------------------+
7111// | | Save area for AltiVec registers |
7112// | +--------------------------------------------+
7113// | | AltiVec alignment padding |
7114// | +--------------------------------------------+
7115// | | Save area for VRSAVE register |
7116// | +--------------------------------------------+
7117// | | Save area for General Purpose registers |
7118// | +--------------------------------------------+
7119// | | Save area for Floating Point registers |
7120// | +--------------------------------------------+
7121// +---- | Back chain |
7122// High Memory +--------------------------------------------+
7123//
7124// Specifications:
7125// AIX 7.2 Assembler Language Reference
7126// Subroutine linkage convention
7127
7128SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7129 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7130 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7131 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7132
7133 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))
7134 CallConv == CallingConv::Fast) &&(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))
7135 "Unexpected calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7135, __PRETTY_FUNCTION__))
;
7136
7137 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7138 report_fatal_error("Tail call support is unimplemented on AIX.");
7139
7140 if (useSoftFloat())
7141 report_fatal_error("Soft float support is unimplemented on AIX.");
7142
7143 const PPCSubtarget &Subtarget =
7144 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7145
7146 const bool IsPPC64 = Subtarget.isPPC64();
7147 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7148
7149 // Assign locations to all of the incoming arguments.
7150 SmallVector<CCValAssign, 16> ArgLocs;
7151 MachineFunction &MF = DAG.getMachineFunction();
7152 MachineFrameInfo &MFI = MF.getFrameInfo();
7153 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7154
7155 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7156 // Reserve space for the linkage area on the stack.
7157 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7158 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7159 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7160
7161 SmallVector<SDValue, 8> MemOps;
7162
7163 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7164 CCValAssign &VA = ArgLocs[I++];
7165 MVT LocVT = VA.getLocVT();
7166 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7167
7168 // For compatibility with the AIX XL compiler, the float args in the
7169 // parameter save area are initialized even if the argument is available
7170 // in register. The caller is required to initialize both the register
7171 // and memory, however, the callee can choose to expect it in either.
7172 // The memloc is dismissed here because the argument is retrieved from
7173 // the register.
7174 if (VA.isMemLoc() && VA.needsCustom())
7175 continue;
7176
7177 if (Flags.isByVal() && VA.isMemLoc()) {
7178 const unsigned Size =
7179 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7180 PtrByteSize);
7181 const int FI = MF.getFrameInfo().CreateFixedObject(
7182 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7183 /* IsAliased */ true);
7184 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7185 InVals.push_back(FIN);
7186
7187 continue;
7188 }
7189
7190 if (Flags.isByVal()) {
7191 assert(VA.isRegLoc() && "MemLocs should already be handled.")((VA.isRegLoc() && "MemLocs should already be handled."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"MemLocs should already be handled.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7191, __PRETTY_FUNCTION__))
;
7192
7193 const MCPhysReg ArgReg = VA.getLocReg();
7194 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7195
7196 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7197 report_fatal_error("Over aligned byvals not supported yet.");
7198
7199 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7200 const int FI = MF.getFrameInfo().CreateFixedObject(
7201 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7202 /* IsAliased */ true);
7203 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7204 InVals.push_back(FIN);
7205
7206 // Add live ins for all the RegLocs for the same ByVal.
7207 const TargetRegisterClass *RegClass =
7208 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7209
7210 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7211 unsigned Offset) {
7212 const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7213 // Since the callers side has left justified the aggregate in the
7214 // register, we can simply store the entire register into the stack
7215 // slot.
7216 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7217 // The store to the fixedstack object is needed becuase accessing a
7218 // field of the ByVal will use a gep and load. Ideally we will optimize
7219 // to extracting the value from the register directly, and elide the
7220 // stores when the arguments address is not taken, but that will need to
7221 // be future work.
7222 SDValue Store = DAG.getStore(
7223 CopyFrom.getValue(1), dl, CopyFrom,
7224 DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7225 MachinePointerInfo::getFixedStack(MF, FI, Offset));
7226
7227 MemOps.push_back(Store);
7228 };
7229
7230 unsigned Offset = 0;
7231 HandleRegLoc(VA.getLocReg(), Offset);
7232 Offset += PtrByteSize;
7233 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7234 Offset += PtrByteSize) {
7235 assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))
7236 "RegLocs should be for ByVal argument.")((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))
;
7237
7238 const CCValAssign RL = ArgLocs[I++];
7239 HandleRegLoc(RL.getLocReg(), Offset);
7240 }
7241
7242 if (Offset != StackSize) {
7243 assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7244, __PRETTY_FUNCTION__))
7244 "Expected MemLoc for remaining bytes.")((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7244, __PRETTY_FUNCTION__))
;
7245 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.")((ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].isMemLoc() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7245, __PRETTY_FUNCTION__))
;
7246 // Consume the MemLoc.The InVal has already been emitted, so nothing
7247 // more needs to be done.
7248 ++I;
7249 }
7250
7251 continue;
7252 }
7253
7254 EVT ValVT = VA.getValVT();
7255 if (VA.isRegLoc() && !VA.needsCustom()) {
7256 MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7257 unsigned VReg =
7258 MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7259 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7260 if (ValVT.isScalarInteger() &&
7261 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7262 ArgValue =
7263 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7264 }
7265 InVals.push_back(ArgValue);
7266 continue;
7267 }
7268 if (VA.isMemLoc()) {
7269 const unsigned LocSize = LocVT.getStoreSize();
7270 const unsigned ValSize = ValVT.getStoreSize();
7271 assert((ValSize <= LocSize) &&(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7272, __PRETTY_FUNCTION__))
7272 "Object size is larger than size of MemLoc")(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7272, __PRETTY_FUNCTION__))
;
7273 int CurArgOffset = VA.getLocMemOffset();
7274 // Objects are right-justified because AIX is big-endian.
7275 if (LocSize > ValSize)
7276 CurArgOffset += LocSize - ValSize;
7277 // Potential tail calls could cause overwriting of argument stack slots.
7278 const bool IsImmutable =
7279 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7280 (CallConv == CallingConv::Fast));
7281 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7282 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7283 SDValue ArgValue =
7284 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7285 InVals.push_back(ArgValue);
7286 continue;
7287 }
7288 }
7289
7290 // On AIX a minimum of 8 words is saved to the parameter save area.
7291 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7292 // Area that is at least reserved in the caller of this function.
7293 unsigned CallerReservedArea =
7294 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7295
7296 // Set the size that is at least reserved in caller of this function. Tail
7297 // call optimized function's reserved stack space needs to be aligned so
7298 // that taking the difference between two stack areas will result in an
7299 // aligned stack.
7300 CallerReservedArea =
7301 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7302 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7303 FuncInfo->setMinReservedArea(CallerReservedArea);
7304
7305 if (isVarArg) {
7306 FuncInfo->setVarArgsFrameIndex(
7307 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7308 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7309
7310 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7311 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7312
7313 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7314 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7315 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7316
7317 // The fixed integer arguments of a variadic function are stored to the
7318 // VarArgsFrameIndex on the stack so that they may be loaded by
7319 // dereferencing the result of va_next.
7320 for (unsigned GPRIndex =
7321 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7322 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7323
7324 const unsigned VReg =
7325 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7326 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7327
7328 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7329 SDValue Store =
7330 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7331 MemOps.push_back(Store);
7332 // Increment the address for the next argument to store.
7333 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7334 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7335 }
7336 }
7337
7338 if (!MemOps.empty())
7339 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7340
7341 return Chain;
7342}
7343
7344SDValue PPCTargetLowering::LowerCall_AIX(
7345 SDValue Chain, SDValue Callee, CallFlags CFlags,
7346 const SmallVectorImpl<ISD::OutputArg> &Outs,
7347 const SmallVectorImpl<SDValue> &OutVals,
7348 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7349 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7350 const CallBase *CB) const {
7351 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7352 // AIX ABI stack frame layout.
7353
7354 assert((CFlags.CallConv == CallingConv::C ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
7355 CFlags.CallConv == CallingConv::Cold ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
7356 CFlags.CallConv == CallingConv::Fast) &&(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
7357 "Unexpected calling convention!")(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7357, __PRETTY_FUNCTION__))
;
7358
7359 if (CFlags.IsPatchPoint)
7360 report_fatal_error("This call type is unimplemented on AIX.");
7361
7362 const PPCSubtarget& Subtarget =
7363 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7364 if (Subtarget.hasAltivec())
7365 report_fatal_error("Altivec support is unimplemented on AIX.");
7366
7367 MachineFunction &MF = DAG.getMachineFunction();
7368 SmallVector<CCValAssign, 16> ArgLocs;
7369 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7370 *DAG.getContext());
7371
7372 // Reserve space for the linkage save area (LSA) on the stack.
7373 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7374 // [SP][CR][LR][2 x reserved][TOC].
7375 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7376 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7377 const bool IsPPC64 = Subtarget.isPPC64();
7378 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7379 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7380 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7381 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7382
7383 // The prolog code of the callee may store up to 8 GPR argument registers to
7384 // the stack, allowing va_start to index over them in memory if the callee
7385 // is variadic.
7386 // Because we cannot tell if this is needed on the caller side, we have to
7387 // conservatively assume that it is needed. As such, make sure we have at
7388 // least enough stack space for the caller to store the 8 GPRs.
7389 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7390 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7391 CCInfo.getNextStackOffset());
7392
7393 // Adjust the stack pointer for the new arguments...
7394 // These operations are automatically eliminated by the prolog/epilog pass.
7395 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7396 SDValue CallSeqStart = Chain;
7397
7398 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7399 SmallVector<SDValue, 8> MemOpChains;
7400
7401 // Set up a copy of the stack pointer for loading and storing any
7402 // arguments that may not fit in the registers available for argument
7403 // passing.
7404 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7405 : DAG.getRegister(PPC::R1, MVT::i32);
7406
7407 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7408 const unsigned ValNo = ArgLocs[I].getValNo();
7409 SDValue Arg = OutVals[ValNo];
7410 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7411
7412 if (Flags.isByVal()) {
7413 const unsigned ByValSize = Flags.getByValSize();
7414
7415 // Nothing to do for zero-sized ByVals on the caller side.
7416 if (!ByValSize) {
7417 ++I;
7418 continue;
7419 }
7420
7421 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7422 return DAG.getExtLoad(
7423 ISD::ZEXTLOAD, dl, PtrVT, Chain,
7424 (LoadOffset != 0)
7425 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7426 : Arg,
7427 MachinePointerInfo(), VT);
7428 };
7429
7430 unsigned LoadOffset = 0;
7431
7432 // Initialize registers, which are fully occupied by the by-val argument.
7433 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7434 SDValue Load = GetLoad(PtrVT, LoadOffset);
7435 MemOpChains.push_back(Load.getValue(1));
7436 LoadOffset += PtrByteSize;
7437 const CCValAssign &ByValVA = ArgLocs[I++];
7438 assert(ByValVA.getValNo() == ValNo &&((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7439, __PRETTY_FUNCTION__))
7439 "Unexpected location for pass-by-value argument.")((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7439, __PRETTY_FUNCTION__))
;
7440 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7441 }
7442
7443 if (LoadOffset == ByValSize)
7444 continue;
7445
7446 // There must be one more loc to handle the remainder.
7447 assert(ArgLocs[I].getValNo() == ValNo &&((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7448, __PRETTY_FUNCTION__))
7448 "Expected additional location for by-value argument.")((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7448, __PRETTY_FUNCTION__))
;
7449
7450 if (ArgLocs[I].isMemLoc()) {
7451 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.")((LoadOffset < ByValSize && "Unexpected memloc for by-val arg."
) ? static_cast<void> (0) : __assert_fail ("LoadOffset < ByValSize && \"Unexpected memloc for by-val arg.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7451, __PRETTY_FUNCTION__))
;
7452 const CCValAssign &ByValVA = ArgLocs[I++];
7453 ISD::ArgFlagsTy MemcpyFlags = Flags;
7454 // Only memcpy the bytes that don't pass in register.
7455 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7456 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7457 (LoadOffset != 0)
7458 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7459 : Arg,
7460 DAG.getObjectPtrOffset(dl, StackPtr,
7461 TypeSize::Fixed(ByValVA.getLocMemOffset())),
7462 CallSeqStart, MemcpyFlags, DAG, dl);
7463 continue;
7464 }
7465
7466 // Initialize the final register residue.
7467 // Any residue that occupies the final by-val arg register must be
7468 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7469 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7470 // 2 and 1 byte loads.
7471 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7472 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7473, __PRETTY_FUNCTION__))
7473 "Unexpected register residue for by-value argument.")((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7473, __PRETTY_FUNCTION__))
;
7474 SDValue ResidueVal;
7475 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7476 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7477 const MVT VT =
7478 N == 1 ? MVT::i8
7479 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7480 SDValue Load = GetLoad(VT, LoadOffset);
7481 MemOpChains.push_back(Load.getValue(1));
7482 LoadOffset += N;
7483 Bytes += N;
7484
7485 // By-val arguments are passed left-justfied in register.
7486 // Every load here needs to be shifted, otherwise a full register load
7487 // should have been used.
7488 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))
7489 "Unexpected load emitted during handling of pass-by-value "((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))
7490 "argument.")((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7490, __PRETTY_FUNCTION__))
;
7491 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7492 EVT ShiftAmountTy =
7493 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7494 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7495 SDValue ShiftedLoad =
7496 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7497 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7498 ShiftedLoad)
7499 : ShiftedLoad;
7500 }
7501
7502 const CCValAssign &ByValVA = ArgLocs[I++];
7503 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7504 continue;
7505 }
7506
7507 CCValAssign &VA = ArgLocs[I++];
7508 const MVT LocVT = VA.getLocVT();
7509 const MVT ValVT = VA.getValVT();
7510
7511 switch (VA.getLocInfo()) {
7512 default:
7513 report_fatal_error("Unexpected argument extension type.");
7514 case CCValAssign::Full:
7515 break;
7516 case CCValAssign::ZExt:
7517 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7518 break;
7519 case CCValAssign::SExt:
7520 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7521 break;
7522 }
7523
7524 if (VA.isRegLoc() && !VA.needsCustom()) {
7525 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7526 continue;
7527 }
7528
7529 if (VA.isMemLoc()) {
7530 SDValue PtrOff =
7531 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7532 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7533 MemOpChains.push_back(
7534 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7535
7536 continue;
7537 }
7538
7539 // Custom handling is used for GPR initializations for vararg float
7540 // arguments.
7541 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))
7542 ValVT.isFloatingPoint() && LocVT.isInteger() &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))
7543 "Unexpected register handling for calling convention.")((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7543, __PRETTY_FUNCTION__))
;
7544
7545 SDValue ArgAsInt =
7546 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7547
7548 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7549 // f32 in 32-bit GPR
7550 // f64 in 64-bit GPR
7551 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7552 else if (Arg.getValueType().getFixedSizeInBits() <
7553 LocVT.getFixedSizeInBits())
7554 // f32 in 64-bit GPR.
7555 RegsToPass.push_back(std::make_pair(
7556 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7557 else {
7558 // f64 in two 32-bit GPRs
7559 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7560 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))
7561 "Unexpected custom register for argument!")((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))
;
7562 CCValAssign &GPR1 = VA;
7563 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7564 DAG.getConstant(32, dl, MVT::i8));
7565 RegsToPass.push_back(std::make_pair(
7566 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7567
7568 if (I != E) {
7569 // If only 1 GPR was available, there will only be one custom GPR and
7570 // the argument will also pass in memory.
7571 CCValAssign &PeekArg = ArgLocs[I];
7572 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7573 assert(PeekArg.needsCustom() && "A second custom GPR is expected.")((PeekArg.needsCustom() && "A second custom GPR is expected."
) ? static_cast<void> (0) : __assert_fail ("PeekArg.needsCustom() && \"A second custom GPR is expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7573, __PRETTY_FUNCTION__))
;
7574 CCValAssign &GPR2 = ArgLocs[I++];
7575 RegsToPass.push_back(std::make_pair(
7576 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7577 }
7578 }
7579 }
7580 }
7581
7582 if (!MemOpChains.empty())
7583 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7584
7585 // For indirect calls, we need to save the TOC base to the stack for
7586 // restoration after the call.
7587 if (CFlags.IsIndirect) {
7588 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7588, __PRETTY_FUNCTION__))
;
7589 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7590 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7591 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7592 const unsigned TOCSaveOffset =
7593 Subtarget.getFrameLowering()->getTOCSaveOffset();
7594
7595 setUsesTOCBasePtr(DAG);
7596 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7597 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7598 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7599 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7600 Chain = DAG.getStore(
7601 Val.getValue(1), dl, Val, AddPtr,
7602 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7603 }
7604
7605 // Build a sequence of copy-to-reg nodes chained together with token chain
7606 // and flag operands which copy the outgoing args into the appropriate regs.
7607 SDValue InFlag;
7608 for (auto Reg : RegsToPass) {
7609 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7610 InFlag = Chain.getValue(1);
7611 }
7612
7613 const int SPDiff = 0;
7614 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7615 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7616}
7617
7618bool
7619PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7620 MachineFunction &MF, bool isVarArg,
7621 const SmallVectorImpl<ISD::OutputArg> &Outs,
7622 LLVMContext &Context) const {
7623 SmallVector<CCValAssign, 16> RVLocs;
7624 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7625 return CCInfo.CheckReturn(
7626 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7627 ? RetCC_PPC_Cold
7628 : RetCC_PPC);
7629}
7630
7631SDValue
7632PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7633 bool isVarArg,
7634 const SmallVectorImpl<ISD::OutputArg> &Outs,
7635 const SmallVectorImpl<SDValue> &OutVals,
7636 const SDLoc &dl, SelectionDAG &DAG) const {
7637 SmallVector<CCValAssign, 16> RVLocs;
7638 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7639 *DAG.getContext());
7640 CCInfo.AnalyzeReturn(Outs,
7641 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7642 ? RetCC_PPC_Cold
7643 : RetCC_PPC);
7644
7645 SDValue Flag;
7646 SmallVector<SDValue, 4> RetOps(1, Chain);
7647
7648 // Copy the result values into the output registers.
7649 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7650 CCValAssign &VA = RVLocs[i];
7651 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7651, __PRETTY_FUNCTION__))
;
7652
7653 SDValue Arg = OutVals[RealResIdx];
7654
7655 if (Subtarget.isAIXABI() &&
7656 (VA.getLocVT().isVector() || VA.getValVT().isVector()))
7657 report_fatal_error("Returning vector types not yet supported on AIX.");
7658
7659 switch (VA.getLocInfo()) {
7660 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7660)
;
7661 case CCValAssign::Full: break;
7662 case CCValAssign::AExt:
7663 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7664 break;
7665 case CCValAssign::ZExt:
7666 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7667 break;
7668 case CCValAssign::SExt:
7669 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7670 break;
7671 }
7672 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7673 bool isLittleEndian = Subtarget.isLittleEndian();
7674 // Legalize ret f64 -> ret 2 x i32.
7675 SDValue SVal =
7676 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7677 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7678 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7679 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7680 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7681 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7682 Flag = Chain.getValue(1);
7683 VA = RVLocs[++i]; // skip ahead to next loc
7684 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7685 } else
7686 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7687 Flag = Chain.getValue(1);
7688 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7689 }
7690
7691 RetOps[0] = Chain; // Update chain.
7692
7693 // Add the flag if we have it.
7694 if (Flag.getNode())
7695 RetOps.push_back(Flag);
7696
7697 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7698}
7699
7700SDValue
7701PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7702 SelectionDAG &DAG) const {
7703 SDLoc dl(Op);
7704
7705 // Get the correct type for integers.
7706 EVT IntVT = Op.getValueType();
7707
7708 // Get the inputs.
7709 SDValue Chain = Op.getOperand(0);
7710 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7711 // Build a DYNAREAOFFSET node.
7712 SDValue Ops[2] = {Chain, FPSIdx};
7713 SDVTList VTs = DAG.getVTList(IntVT);
7714 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7715}
7716
7717SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7718 SelectionDAG &DAG) const {
7719 // When we pop the dynamic allocation we need to restore the SP link.
7720 SDLoc dl(Op);
7721
7722 // Get the correct type for pointers.
7723 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7724
7725 // Construct the stack pointer operand.
7726 bool isPPC64 = Subtarget.isPPC64();
7727 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7728 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7729
7730 // Get the operands for the STACKRESTORE.
7731 SDValue Chain = Op.getOperand(0);
7732 SDValue SaveSP = Op.getOperand(1);
7733
7734 // Load the old link SP.
7735 SDValue LoadLinkSP =
7736 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7737
7738 // Restore the stack pointer.
7739 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7740
7741 // Store the old link SP.
7742 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7743}
7744
7745SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7746 MachineFunction &MF = DAG.getMachineFunction();
7747 bool isPPC64 = Subtarget.isPPC64();
7748 EVT PtrVT = getPointerTy(MF.getDataLayout());
7749
7750 // Get current frame pointer save index. The users of this index will be
7751 // primarily DYNALLOC instructions.
7752 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7753 int RASI = FI->getReturnAddrSaveIndex();
7754
7755 // If the frame pointer save index hasn't been defined yet.
7756 if (!RASI) {
7757 // Find out what the fix offset of the frame pointer save area.
7758 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7759 // Allocate the frame index for frame pointer save area.
7760 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7761 // Save the result.
7762 FI->setReturnAddrSaveIndex(RASI);
7763 }
7764 return DAG.getFrameIndex(RASI, PtrVT);
7765}
7766
7767SDValue
7768PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7769 MachineFunction &MF = DAG.getMachineFunction();
7770 bool isPPC64 = Subtarget.isPPC64();
7771 EVT PtrVT = getPointerTy(MF.getDataLayout());
7772
7773 // Get current frame pointer save index. The users of this index will be
7774 // primarily DYNALLOC instructions.
7775 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7776 int FPSI = FI->getFramePointerSaveIndex();
7777
7778 // If the frame pointer save index hasn't been defined yet.
7779 if (!FPSI) {
7780 // Find out what the fix offset of the frame pointer save area.
7781 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7782 // Allocate the frame index for frame pointer save area.
7783 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7784 // Save the result.
7785 FI->setFramePointerSaveIndex(FPSI);
7786 }
7787 return DAG.getFrameIndex(FPSI, PtrVT);
7788}
7789
7790SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7791 SelectionDAG &DAG) const {
7792 MachineFunction &MF = DAG.getMachineFunction();
7793 // Get the inputs.
7794 SDValue Chain = Op.getOperand(0);
7795 SDValue Size = Op.getOperand(1);
7796 SDLoc dl(Op);
7797
7798 // Get the correct type for pointers.
7799 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7800 // Negate the size.
7801 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7802 DAG.getConstant(0, dl, PtrVT), Size);
7803 // Construct a node for the frame pointer save index.
7804 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7805 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7806 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7807 if (hasInlineStackProbe(MF))
7808 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7809 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7810}
7811
7812SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7813 SelectionDAG &DAG) const {
7814 MachineFunction &MF = DAG.getMachineFunction();
7815
7816 bool isPPC64 = Subtarget.isPPC64();
7817 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7818
7819 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7820 return DAG.getFrameIndex(FI, PtrVT);
7821}
7822
7823SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7824 SelectionDAG &DAG) const {
7825 SDLoc DL(Op);
7826 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7827 DAG.getVTList(MVT::i32, MVT::Other),
7828 Op.getOperand(0), Op.getOperand(1));
7829}
7830
7831SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7832 SelectionDAG &DAG) const {
7833 SDLoc DL(Op);
7834 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7835 Op.getOperand(0), Op.getOperand(1));
7836}
7837
7838SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7839 if (Op.getValueType().isVector())
7840 return LowerVectorLoad(Op, DAG);
7841
7842 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7843, __PRETTY_FUNCTION__))
7843 "Custom lowering only for i1 loads")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7843, __PRETTY_FUNCTION__))
;
7844
7845 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7846
7847 SDLoc dl(Op);
7848 LoadSDNode *LD = cast<LoadSDNode>(Op);
7849
7850 SDValue Chain = LD->getChain();
7851 SDValue BasePtr = LD->getBasePtr();
7852 MachineMemOperand *MMO = LD->getMemOperand();
7853
7854 SDValue NewLD =
7855 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7856 BasePtr, MVT::i8, MMO);
7857 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7858
7859 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7860 return DAG.getMergeValues(Ops, dl);
7861}
7862
7863SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7864 if (Op.getOperand(1).getValueType().isVector())
7865 return LowerVectorStore(Op, DAG);
7866
7867 assert(Op.getOperand(1).getValueType() == MVT::i1 &&((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7868, __PRETTY_FUNCTION__))
7868 "Custom lowering only for i1 stores")((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7868, __PRETTY_FUNCTION__))
;
7869
7870 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7871
7872 SDLoc dl(Op);
7873 StoreSDNode *ST = cast<StoreSDNode>(Op);
7874
7875 SDValue Chain = ST->getChain();
7876 SDValue BasePtr = ST->getBasePtr();
7877 SDValue Value = ST->getValue();
7878 MachineMemOperand *MMO = ST->getMemOperand();
7879
7880 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7881 Value);
7882 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7883}
7884
7885// FIXME: Remove this once the ANDI glue bug is fixed:
7886SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7887 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7888, __PRETTY_FUNCTION__))
7888 "Custom lowering only for i1 results")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7888, __PRETTY_FUNCTION__))
;
7889
7890 SDLoc DL(Op);
7891 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7892}
7893
7894SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7895 SelectionDAG &DAG) const {
7896
7897 // Implements a vector truncate that fits in a vector register as a shuffle.
7898 // We want to legalize vector truncates down to where the source fits in
7899 // a vector register (and target is therefore smaller than vector register
7900 // size). At that point legalization will try to custom lower the sub-legal
7901 // result and get here - where we can contain the truncate as a single target
7902 // operation.
7903
7904 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7905 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7906 //
7907 // We will implement it for big-endian ordering as this (where x denotes
7908 // undefined):
7909 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7910 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7911 //
7912 // The same operation in little-endian ordering will be:
7913 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7914 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7915
7916 EVT TrgVT = Op.getValueType();
7917 assert(TrgVT.isVector() && "Vector type expected.")((TrgVT.isVector() && "Vector type expected.") ? static_cast
<void> (0) : __assert_fail ("TrgVT.isVector() && \"Vector type expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7917, __PRETTY_FUNCTION__))
;
7918 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7919 EVT EltVT = TrgVT.getVectorElementType();
7920 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7921 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7922 !isPowerOf2_32(EltVT.getSizeInBits()))
7923 return SDValue();
7924
7925 SDValue N1 = Op.getOperand(0);
7926 EVT SrcVT = N1.getValueType();
7927 unsigned SrcSize = SrcVT.getSizeInBits();
7928 if (SrcSize > 256 ||
7929 !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7930 !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7931 return SDValue();
7932 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7933 return SDValue();
7934
7935 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7936 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7937
7938 SDLoc DL(Op);
7939 SDValue Op1, Op2;
7940 if (SrcSize == 256) {
7941 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7942 EVT SplitVT =
7943 N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7944 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7945 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7946 DAG.getConstant(0, DL, VecIdxTy));
7947 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7948 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7949 }
7950 else {
7951 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7952 Op2 = DAG.getUNDEF(WideVT);
7953 }
7954
7955 // First list the elements we want to keep.
7956 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7957 SmallVector<int, 16> ShuffV;
7958 if (Subtarget.isLittleEndian())
7959 for (unsigned i = 0; i < TrgNumElts; ++i)
7960 ShuffV.push_back(i * SizeMult);
7961 else
7962 for (unsigned i = 1; i <= TrgNumElts; ++i)
7963 ShuffV.push_back(i * SizeMult - 1);
7964
7965 // Populate the remaining elements with undefs.
7966 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7967 // ShuffV.push_back(i + WideNumElts);
7968 ShuffV.push_back(WideNumElts + 1);
7969
7970 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7971 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7972 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7973}
7974
7975/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7976/// possible.
7977SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7978 // Not FP, or using SPE? Not a fsel.
7979 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7980 !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7981 return Op;
7982
7983 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7984
7985 EVT ResVT = Op.getValueType();
7986 EVT CmpVT = Op.getOperand(0).getValueType();
7987 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7988 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7989 SDLoc dl(Op);
7990 SDNodeFlags Flags = Op.getNode()->getFlags();
7991
7992 // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7993 // presence of infinities.
7994 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7995 switch (CC) {
7996 default:
7997 break;
7998 case ISD::SETOGT:
7999 case ISD::SETGT:
8000 return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
8001 case ISD::SETOLT:
8002 case ISD::SETLT:
8003 return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8004 }
8005 }
8006
8007 // We might be able to do better than this under some circumstances, but in
8008 // general, fsel-based lowering of select is a finite-math-only optimization.
8009 // For more information, see section F.3 of the 2.06 ISA specification.
8010 // With ISA 3.0
8011 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8012 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8013 return Op;
8014
8015 // If the RHS of the comparison is a 0.0, we don't need to do the
8016 // subtraction at all.
8017 SDValue Sel1;
8018 if (isFloatingPointZero(RHS))
8019 switch (CC) {
8020 default: break; // SETUO etc aren't handled by fsel.
8021 case ISD::SETNE:
8022 std::swap(TV, FV);
8023 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8024 case ISD::SETEQ:
8025 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8026 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8027 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8028 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8029 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8030 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8031 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8032 case ISD::SETULT:
8033 case ISD::SETLT:
8034 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8035 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8036 case ISD::SETOGE:
8037 case ISD::SETGE:
8038 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8039 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8040 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8041 case ISD::SETUGT:
8042 case ISD::SETGT:
8043 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8044 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8045 case ISD::SETOLE:
8046 case ISD::SETLE:
8047 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8048 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8049 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8050 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8051 }
8052
8053 SDValue Cmp;
8054 switch (CC) {
8055 default: break; // SETUO etc aren't handled by fsel.
8056 case ISD::SETNE:
8057 std::swap(TV, FV);
8058 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8059 case ISD::SETEQ:
8060 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8061 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8062 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8063 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8064 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8065 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8066 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8067 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8068 case ISD::SETULT:
8069 case ISD::SETLT:
8070 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8071 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8072 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8073 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8074 case ISD::SETOGE:
8075 case ISD::SETGE:
8076 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8077 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8078 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8079 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8080 case ISD::SETUGT:
8081 case ISD::SETGT:
8082 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8083 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8084 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8085 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8086 case ISD::SETOLE:
8087 case ISD::SETLE:
8088 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8089 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8090 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8091 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8092 }
8093 return Op;
8094}
8095
8096static unsigned getPPCStrictOpcode(unsigned Opc) {
8097 switch (Opc) {
8098 default:
8099 llvm_unreachable("No strict version of this opcode!")::llvm::llvm_unreachable_internal("No strict version of this opcode!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8099)
;
8100 case PPCISD::FCTIDZ:
8101 return PPCISD::STRICT_FCTIDZ;
8102 case PPCISD::FCTIWZ:
8103 return PPCISD::STRICT_FCTIWZ;
8104 case PPCISD::FCTIDUZ:
8105 return PPCISD::STRICT_FCTIDUZ;
8106 case PPCISD::FCTIWUZ:
8107 return PPCISD::STRICT_FCTIWUZ;
8108 case PPCISD::FCFID:
8109 return PPCISD::STRICT_FCFID;
8110 case PPCISD::FCFIDU:
8111 return PPCISD::STRICT_FCFIDU;
8112 case PPCISD::FCFIDS:
8113 return PPCISD::STRICT_FCFIDS;
8114 case PPCISD::FCFIDUS:
8115 return PPCISD::STRICT_FCFIDUS;
8116 }
8117}
8118
8119static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8120 const PPCSubtarget &Subtarget) {
8121 SDLoc dl(Op);
8122 bool IsStrict = Op->isStrictFPOpcode();
8123 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8124 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8125
8126 // TODO: Any other flags to propagate?
8127 SDNodeFlags Flags;
8128 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8129
8130 // For strict nodes, source is the second operand.
8131 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8132 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8133 assert(Src.getValueType().isFloatingPoint())((Src.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("Src.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8133, __PRETTY_FUNCTION__))
;
8134 if (Src.getValueType() == MVT::f32) {
8135 if (IsStrict) {
8136 Src =
8137 DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8138 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8139 Chain = Src.getValue(1);
8140 } else
8141 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8142 }
8143 SDValue Conv;
8144 unsigned Opc = ISD::DELETED_NODE;
8145 switch (Op.getSimpleValueType().SimpleTy) {
8146 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8146)
;
8147 case MVT::i32:
8148 Opc = IsSigned ? PPCISD::FCTIWZ
8149 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8150 break;
8151 case MVT::i64:
8152 assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8153, __PRETTY_FUNCTION__))
8153 "i64 FP_TO_UINT is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8153, __PRETTY_FUNCTION__))
;
8154 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8155 }
8156 if (IsStrict) {
8157 Opc = getPPCStrictOpcode(Opc);
8158 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8159 {Chain, Src}, Flags);
8160 } else {
8161 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8162 }
8163 return Conv;
8164}
8165
8166void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8167 SelectionDAG &DAG,
8168 const SDLoc &dl) const {
8169 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8170 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8171 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8172 bool IsStrict = Op->isStrictFPOpcode();
8173
8174 // Convert the FP value to an int value through memory.
8175 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8176 (IsSigned || Subtarget.hasFPCVT());
8177 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8178 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8179 MachinePointerInfo MPI =
8180 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8181
8182 // Emit a store to the stack slot.
8183 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8184 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8185 if (i32Stack) {
8186 MachineFunction &MF = DAG.getMachineFunction();
8187 Alignment = Align(4);
8188 MachineMemOperand *MMO =
8189 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8190 SDValue Ops[] = { Chain, Tmp, FIPtr };
8191 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8192 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8193 } else
8194 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8195
8196 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8197 // add in a bias on big endian.
8198 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8199 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8200 DAG.getConstant(4, dl, FIPtr.getValueType()));
8201 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8202 }
8203
8204 RLI.Chain = Chain;
8205 RLI.Ptr = FIPtr;
8206 RLI.MPI = MPI;
8207 RLI.Alignment = Alignment;
8208}
8209
8210/// Custom lowers floating point to integer conversions to use
8211/// the direct move instructions available in ISA 2.07 to avoid the
8212/// need for load/store combinations.
8213SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8214 SelectionDAG &DAG,
8215 const SDLoc &dl) const {
8216 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8217 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8218 if (Op->isStrictFPOpcode())
8219 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8220 else
8221 return Mov;
8222}
8223
8224SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8225 const SDLoc &dl) const {
8226 bool IsStrict = Op->isStrictFPOpcode();
8227 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8228 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8229 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8230 EVT SrcVT = Src.getValueType();
8231 EVT DstVT = Op.getValueType();
8232
8233 // FP to INT conversions are legal for f128.
8234 if (SrcVT == MVT::f128)
8235 return Op;
8236
8237 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8238 // PPC (the libcall is not available).
8239 if (SrcVT == MVT::ppcf128) {
8240 if (DstVT == MVT::i32) {
8241 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8242 // set other fast-math flags to FP operations in both strict and
8243 // non-strict cases. (FP_TO_SINT, FSUB)
8244 SDNodeFlags Flags;
8245 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8246
8247 if (IsSigned) {
8248 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8249 DAG.getIntPtrConstant(0, dl));
8250 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8251 DAG.getIntPtrConstant(1, dl));
8252
8253 // Add the two halves of the long double in round-to-zero mode, and use
8254 // a smaller FP_TO_SINT.
8255 if (IsStrict) {
8256 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8257 DAG.getVTList(MVT::f64, MVT::Other),
8258 {Op.getOperand(0), Lo, Hi}, Flags);
8259 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8260 DAG.getVTList(MVT::i32, MVT::Other),
8261 {Res.getValue(1), Res}, Flags);
8262 } else {
8263 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8264 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8265 }
8266 } else {
8267 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8268 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8269 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8270 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8271 if (IsStrict) {
8272 // Sel = Src < 0x80000000
8273 // FltOfs = select Sel, 0.0, 0x80000000
8274 // IntOfs = select Sel, 0, 0x80000000
8275 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8276 SDValue Chain = Op.getOperand(0);
8277 EVT SetCCVT =
8278 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8279 EVT DstSetCCVT =
8280 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8281 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8282 Chain, true);
8283 Chain = Sel.getValue(1);
8284
8285 SDValue FltOfs = DAG.getSelect(
8286 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8287 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8288
8289 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8290 DAG.getVTList(SrcVT, MVT::Other),
8291 {Chain, Src, FltOfs}, Flags);
8292 Chain = Val.getValue(1);
8293 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8294 DAG.getVTList(DstVT, MVT::Other),
8295 {Chain, Val}, Flags);
8296 Chain = SInt.getValue(1);
8297 SDValue IntOfs = DAG.getSelect(
8298 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8299 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8300 return DAG.getMergeValues({Result, Chain}, dl);
8301 } else {
8302 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8303 // FIXME: generated code sucks.
8304 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8305 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8306 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8307 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8308 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8309 }
8310 }
8311 }
8312
8313 return SDValue();
8314 }
8315
8316 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8317 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8318
8319 ReuseLoadInfo RLI;
8320 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8321
8322 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8323 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8324}
8325
8326// We're trying to insert a regular store, S, and then a load, L. If the
8327// incoming value, O, is a load, we might just be able to have our load use the
8328// address used by O. However, we don't know if anything else will store to
8329// that address before we can load from it. To prevent this situation, we need
8330// to insert our load, L, into the chain as a peer of O. To do this, we give L
8331// the same chain operand as O, we create a token factor from the chain results
8332// of O and L, and we replace all uses of O's chain result with that token
8333// factor (see spliceIntoChain below for this last part).
8334bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8335 ReuseLoadInfo &RLI,
8336 SelectionDAG &DAG,
8337 ISD::LoadExtType ET) const {
8338 // Conservatively skip reusing for constrained FP nodes.
8339 if (Op->isStrictFPOpcode())
8340 return false;
8341
8342 SDLoc dl(Op);
8343 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8344 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8345 if (ET == ISD::NON_EXTLOAD &&
8346 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8347 isOperationLegalOrCustom(Op.getOpcode(),
8348 Op.getOperand(0).getValueType())) {
8349
8350 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8351 return true;
8352 }
8353
8354 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8355 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8356 LD->isNonTemporal())
8357 return false;
8358 if (LD->getMemoryVT() != MemVT)
8359 return false;
8360
8361 RLI.Ptr = LD->getBasePtr();
8362 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8363 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8364, __PRETTY_FUNCTION__))
8364 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8364, __PRETTY_FUNCTION__))
;
8365 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8366 LD->getOffset());
8367 }
8368
8369 RLI.Chain = LD->getChain();
8370 RLI.MPI = LD->getPointerInfo();
8371 RLI.IsDereferenceable = LD->isDereferenceable();
8372 RLI.IsInvariant = LD->isInvariant();
8373 RLI.Alignment = LD->getAlign();
8374 RLI.AAInfo = LD->getAAInfo();
8375 RLI.Ranges = LD->getRanges();
8376
8377 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8378 return true;
8379}
8380
8381// Given the head of the old chain, ResChain, insert a token factor containing
8382// it and NewResChain, and make users of ResChain now be users of that token
8383// factor.
8384// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8385void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8386 SDValue NewResChain,
8387 SelectionDAG &DAG) const {
8388 if (!ResChain)
8389 return;
8390
8391 SDLoc dl(NewResChain);
8392
8393 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8394 NewResChain, DAG.getUNDEF(MVT::Other));
8395 assert(TF.getNode() != NewResChain.getNode() &&((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8396, __PRETTY_FUNCTION__))
8396 "A new TF really is required here")((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8396, __PRETTY_FUNCTION__))
;
8397
8398 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8399 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8400}
8401
8402/// Analyze profitability of direct move
8403/// prefer float load to int load plus direct move
8404/// when there is no integer use of int load
8405bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8406 SDNode *Origin = Op.getOperand(0).getNode();
8407 if (Origin->getOpcode() != ISD::LOAD)
8408 return true;
8409
8410 // If there is no LXSIBZX/LXSIHZX, like Power8,
8411 // prefer direct move if the memory size is 1 or 2 bytes.
8412 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8413 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8414 return true;
8415
8416 for (SDNode::use_iterator UI = Origin->use_begin(),
8417 UE = Origin->use_end();
8418 UI != UE; ++UI) {
8419
8420 // Only look at the users of the loaded value.
8421 if (UI.getUse().get().getResNo() != 0)
8422 continue;
8423
8424 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8425 UI->getOpcode() != ISD::UINT_TO_FP &&
8426 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8427 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8428 return true;
8429 }
8430
8431 return false;
8432}
8433
8434static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8435 const PPCSubtarget &Subtarget,
8436 SDValue Chain = SDValue()) {
8437 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8438 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8439 SDLoc dl(Op);
8440
8441 // TODO: Any other flags to propagate?
8442 SDNodeFlags Flags;
8443 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8444
8445 // If we have FCFIDS, then use it when converting to single-precision.
8446 // Otherwise, convert to double-precision and then round.
8447 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8448 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8449 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8450 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8451 if (Op->isStrictFPOpcode()) {
8452 if (!Chain)
8453 Chain = Op.getOperand(0);
8454 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8455 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8456 } else
8457 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8458}
8459
8460/// Custom lowers integer to floating point conversions to use
8461/// the direct move instructions available in ISA 2.07 to avoid the
8462/// need for load/store combinations.
8463SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8464 SelectionDAG &DAG,
8465 const SDLoc &dl) const {
8466 assert((Op.getValueType() == MVT::f32 ||(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))
8467 Op.getValueType() == MVT::f64) &&(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))
8468 "Invalid floating point type as target of conversion")(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8468, __PRETTY_FUNCTION__))
;
8469 assert(Subtarget.hasFPCVT() &&((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8470, __PRETTY_FUNCTION__))
8470 "Int to FP conversions with direct moves require FPCVT")((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8470, __PRETTY_FUNCTION__))
;
8471 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8472 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8473 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8474 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8475 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8476 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8477 return convertIntToFP(Op, Mov, DAG, Subtarget);
8478}
8479
8480static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8481
8482 EVT VecVT = Vec.getValueType();
8483 assert(VecVT.isVector() && "Expected a vector type.")((VecVT.isVector() && "Expected a vector type.") ? static_cast
<void> (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8483, __PRETTY_FUNCTION__))
;
8484 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")((VecVT.getSizeInBits() < 128 && "Vector is already full width."
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8484, __PRETTY_FUNCTION__))
;
8485
8486 EVT EltVT = VecVT.getVectorElementType();
8487 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8488 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8489
8490 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8491 SmallVector<SDValue, 16> Ops(NumConcat);
8492 Ops[0] = Vec;
8493 SDValue UndefVec = DAG.getUNDEF(VecVT);
8494 for (unsigned i = 1; i < NumConcat; ++i)
8495 Ops[i] = UndefVec;
8496
8497 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8498}
8499
8500SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8501 const SDLoc &dl) const {
8502 bool IsStrict = Op->isStrictFPOpcode();
8503 unsigned Opc = Op.getOpcode();
8504 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8505 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))
8506 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))
8507 "Unexpected conversion type")(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8507, __PRETTY_FUNCTION__))
;
8508 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8509, __PRETTY_FUNCTION__))
8509 "Supports conversions to v2f64/v4f32 only.")(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8509, __PRETTY_FUNCTION__))
;
8510
8511 // TODO: Any other flags to propagate?
8512 SDNodeFlags Flags;
8513 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8514
8515 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8516 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8517
8518 SDValue Wide = widenVec(DAG, Src, dl);
8519 EVT WideVT = Wide.getValueType();
8520 unsigned WideNumElts = WideVT.getVectorNumElements();
8521 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8522
8523 SmallVector<int, 16> ShuffV;
8524 for (unsigned i = 0; i < WideNumElts; ++i)
8525 ShuffV.push_back(i + WideNumElts);
8526
8527 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8528 int SaveElts = FourEltRes ? 4 : 2;
8529 if (Subtarget.isLittleEndian())
8530 for (int i = 0; i < SaveElts; i++)
8531 ShuffV[i * Stride] = i;
8532 else
8533 for (int i = 1; i <= SaveElts; i++)
8534 ShuffV[i * Stride - 1] = i - 1;
8535
8536 SDValue ShuffleSrc2 =
8537 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8538 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8539
8540 SDValue Extend;
8541 if (SignedConv) {
8542 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8543 EVT ExtVT = Src.getValueType();
8544 if (Subtarget.hasP9Altivec())
8545 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8546 IntermediateVT.getVectorNumElements());
8547
8548 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8549 DAG.getValueType(ExtVT));
8550 } else
8551 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8552
8553 if (IsStrict)
8554 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8555 {Op.getOperand(0), Extend}, Flags);
8556
8557 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8558}
8559
8560SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8561 SelectionDAG &DAG) const {
8562 SDLoc dl(Op);
8563 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8564 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8565 bool IsStrict = Op->isStrictFPOpcode();
8566 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8567 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8568
8569 // TODO: Any other flags to propagate?
8570 SDNodeFlags Flags;
8571 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8572
8573 EVT InVT = Src.getValueType();
8574 EVT OutVT = Op.getValueType();
8575 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8576 isOperationCustom(Op.getOpcode(), InVT))
8577 return LowerINT_TO_FPVector(Op, DAG, dl);
8578
8579 // Conversions to f128 are legal.
8580 if (Op.getValueType() == MVT::f128)
8581 return Op;
8582
8583 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8584 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8585 return SDValue();
8586
8587 if (Src.getValueType() == MVT::i1)
8588 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8589 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8590 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8591
8592 // If we have direct moves, we can do all the conversion, skip the store/load
8593 // however, without FPCVT we can't do most conversions.
8594 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8595 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8596 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8597
8598 assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8599, __PRETTY_FUNCTION__))
8599 "UINT_TO_FP is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8599, __PRETTY_FUNCTION__))
;
8600
8601 if (Src.getValueType() == MVT::i64) {
8602 SDValue SINT = Src;
8603 // When converting to single-precision, we actually need to convert
8604 // to double-precision first and then round to single-precision.
8605 // To avoid double-rounding effects during that operation, we have
8606 // to prepare the input operand. Bits that might be truncated when
8607 // converting to double-precision are replaced by a bit that won't
8608 // be lost at this stage, but is below the single-precision rounding
8609 // position.
8610 //
8611 // However, if -enable-unsafe-fp-math is in effect, accept double
8612 // rounding to avoid the extra overhead.
8613 if (Op.getValueType() == MVT::f32 &&
8614 !Subtarget.hasFPCVT() &&
8615 !DAG.getTarget().Options.UnsafeFPMath) {
8616
8617 // Twiddle input to make sure the low 11 bits are zero. (If this
8618 // is the case, we are guaranteed the value will fit into the 53 bit
8619 // mantissa of an IEEE double-precision value without rounding.)
8620 // If any of those low 11 bits were not zero originally, make sure
8621 // bit 12 (value 2048) is set instead, so that the final rounding
8622 // to single-precision gets the correct result.
8623 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8624 SINT, DAG.getConstant(2047, dl, MVT::i64));
8625 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8626 Round, DAG.getConstant(2047, dl, MVT::i64));
8627 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8628 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8629 Round, DAG.getConstant(-2048, dl, MVT::i64));
8630
8631 // However, we cannot use that value unconditionally: if the magnitude
8632 // of the input value is small, the bit-twiddling we did above might
8633 // end up visibly changing the output. Fortunately, in that case, we
8634 // don't need to twiddle bits since the original input will convert
8635 // exactly to double-precision floating-point already. Therefore,
8636 // construct a conditional to use the original value if the top 11
8637 // bits are all sign-bit copies, and use the rounded value computed
8638 // above otherwise.
8639 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8640 SINT, DAG.getConstant(53, dl, MVT::i32));
8641 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8642 Cond, DAG.getConstant(1, dl, MVT::i64));
8643 Cond = DAG.getSetCC(
8644 dl,
8645 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8646 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8647
8648 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8649 }
8650
8651 ReuseLoadInfo RLI;
8652 SDValue Bits;
8653
8654 MachineFunction &MF = DAG.getMachineFunction();
8655 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8656 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8657 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8658 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8659 } else if (Subtarget.hasLFIWAX() &&
8660 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8661 MachineMemOperand *MMO =
8662 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8663 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8664 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8665 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8666 DAG.getVTList(MVT::f64, MVT::Other),
8667 Ops, MVT::i32, MMO);
8668 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8669 } else if (Subtarget.hasFPCVT() &&
8670 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8671 MachineMemOperand *MMO =
8672 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8673 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8674 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8675 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8676 DAG.getVTList(MVT::f64, MVT::Other),
8677 Ops, MVT::i32, MMO);
8678 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8679 } else if (((Subtarget.hasLFIWAX() &&
8680 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8681 (Subtarget.hasFPCVT() &&
8682 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8683 SINT.getOperand(0).getValueType() == MVT::i32) {
8684 MachineFrameInfo &MFI = MF.getFrameInfo();
8685 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8686
8687 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8688 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8689
8690 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8691 MachinePointerInfo::getFixedStack(
8692 DAG.getMachineFunction(), FrameIdx));
8693 Chain = Store;
8694
8695 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8696, __PRETTY_FUNCTION__))
8696 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8696, __PRETTY_FUNCTION__))
;
8697
8698 RLI.Ptr = FIdx;
8699 RLI.Chain = Chain;
8700 RLI.MPI =
8701 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8702 RLI.Alignment = Align(4);
8703
8704 MachineMemOperand *MMO =
8705 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8706 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8707 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8708 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8709 PPCISD::LFIWZX : PPCISD::LFIWAX,
8710 dl, DAG.getVTList(MVT::f64, MVT::Other),
8711 Ops, MVT::i32, MMO);
8712 Chain = Bits.getValue(1);
8713 } else
8714 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8715
8716 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8717 if (IsStrict)
8718 Chain = FP.getValue(1);
8719
8720 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8721 if (IsStrict)
8722 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8723 DAG.getVTList(MVT::f32, MVT::Other),
8724 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8725 else
8726 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8727 DAG.getIntPtrConstant(0, dl));
8728 }
8729 return FP;
8730 }
8731
8732 assert(Src.getValueType() == MVT::i32 &&((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8733, __PRETTY_FUNCTION__))
8733 "Unhandled INT_TO_FP type in custom expander!")((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8733, __PRETTY_FUNCTION__))
;
8734 // Since we only generate this in 64-bit mode, we can take advantage of
8735 // 64-bit registers. In particular, sign extend the input value into the
8736 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8737 // then lfd it and fcfid it.
8738 MachineFunction &MF = DAG.getMachineFunction();
8739 MachineFrameInfo &MFI = MF.getFrameInfo();
8740 EVT PtrVT = getPointerTy(MF.getDataLayout());
8741
8742 SDValue Ld;
8743 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8744 ReuseLoadInfo RLI;
8745 bool ReusingLoad;
8746 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8747 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8748 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8749
8750 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8751 MachinePointerInfo::getFixedStack(
8752 DAG.getMachineFunction(), FrameIdx));
8753 Chain = Store;
8754
8755 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8756, __PRETTY_FUNCTION__))
8756 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8756, __PRETTY_FUNCTION__))
;
8757
8758 RLI.Ptr = FIdx;
8759 RLI.Chain = Chain;
8760 RLI.MPI =
8761 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8762 RLI.Alignment = Align(4);
8763 }
8764
8765 MachineMemOperand *MMO =
8766 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8767 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8768 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8769 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8770 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8771 MVT::i32, MMO);
8772 Chain = Ld.getValue(1);
8773 if (ReusingLoad)
8774 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8775 } else {
8776 assert(Subtarget.isPPC64() &&((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8777, __PRETTY_FUNCTION__))
8777 "i32->FP without LFIWAX supported only on PPC64")((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8777, __PRETTY_FUNCTION__))
;
8778
8779 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8780 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8781
8782 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8783
8784 // STD the extended value into the stack slot.
8785 SDValue Store = DAG.getStore(
8786 Chain, dl, Ext64, FIdx,
8787 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8788 Chain = Store;
8789
8790 // Load the value as a double.
8791 Ld = DAG.getLoad(
8792 MVT::f64, dl, Chain, FIdx,
8793 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8794 Chain = Ld.getValue(1);
8795 }
8796
8797 // FCFID it and return it.
8798 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8799 if (IsStrict)
8800 Chain = FP.getValue(1);
8801 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8802 if (IsStrict)
8803 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8804 DAG.getVTList(MVT::f32, MVT::Other),
8805 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8806 else
8807 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8808 DAG.getIntPtrConstant(0, dl));
8809 }
8810 return FP;
8811}
8812
8813SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8814 SelectionDAG &DAG) const {
8815 SDLoc dl(Op);
8816 /*
8817 The rounding mode is in bits 30:31 of FPSR, and has the following
8818 settings:
8819 00 Round to nearest
8820 01 Round to 0
8821 10 Round to +inf
8822 11 Round to -inf
8823
8824 FLT_ROUNDS, on the other hand, expects the following:
8825 -1 Undefined
8826 0 Round to 0
8827 1 Round to nearest
8828 2 Round to +inf
8829 3 Round to -inf
8830
8831 To perform the conversion, we do:
8832 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8833 */
8834
8835 MachineFunction &MF = DAG.getMachineFunction();
8836 EVT VT = Op.getValueType();
8837 EVT PtrVT = getPointerTy(MF.getDataLayout());
8838
8839 // Save FP Control Word to register
8840 SDValue Chain = Op.getOperand(0);
8841 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8842 Chain = MFFS.getValue(1);
8843
8844 // Save FP register to stack slot
8845 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8846 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8847 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8848
8849 // Load FP Control Word from low 32 bits of stack slot.
8850 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8851 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8852 SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8853 Chain = CWD.getValue(1);
8854
8855 // Transform as necessary
8856 SDValue CWD1 =
8857 DAG.getNode(ISD::AND, dl, MVT::i32,
8858 CWD, DAG.getConstant(3, dl, MVT::i32));
8859 SDValue CWD2 =
8860 DAG.getNode(ISD::SRL, dl, MVT::i32,
8861 DAG.getNode(ISD::AND, dl, MVT::i32,
8862 DAG.getNode(ISD::XOR, dl, MVT::i32,
8863 CWD, DAG.getConstant(3, dl, MVT::i32)),
8864 DAG.getConstant(3, dl, MVT::i32)),
8865 DAG.getConstant(1, dl, MVT::i32));
8866
8867 SDValue RetVal =
8868 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8869
8870 RetVal =
8871 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8872 dl, VT, RetVal);
8873
8874 return DAG.getMergeValues({RetVal, Chain}, dl);
8875}
8876
8877SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8878 EVT VT = Op.getValueType();
8879 unsigned BitWidth = VT.getSizeInBits();
8880 SDLoc dl(Op);
8881 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))
8882 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))
8883 "Unexpected SHL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8883, __PRETTY_FUNCTION__))
;
8884
8885 // Expand into a bunch of logical ops. Note that these ops
8886 // depend on the PPC behavior for oversized shift amounts.
8887 SDValue Lo = Op.getOperand(0);
8888 SDValue Hi = Op.getOperand(1);
8889 SDValue Amt = Op.getOperand(2);
8890 EVT AmtVT = Amt.getValueType();
8891
8892 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8893 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8894 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8895 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8896 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8897 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8898 DAG.getConstant(-BitWidth, dl, AmtVT));
8899 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8900 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8901 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8902 SDValue OutOps[] = { OutLo, OutHi };
8903 return DAG.getMergeValues(OutOps, dl);
8904}
8905
8906SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8907 EVT VT = Op.getValueType();
8908 SDLoc dl(Op);
8909 unsigned BitWidth = VT.getSizeInBits();
8910 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))
8911 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))
8912 "Unexpected SRL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8912, __PRETTY_FUNCTION__))
;
8913
8914 // Expand into a bunch of logical ops. Note that these ops
8915 // depend on the PPC behavior for oversized shift amounts.
8916 SDValue Lo = Op.getOperand(0);
8917 SDValue Hi = Op.getOperand(1);
8918 SDValue Amt = Op.getOperand(2);
8919 EVT AmtVT = Amt.getValueType();
8920
8921 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8922 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8923 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8924 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8925 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8926 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8927 DAG.getConstant(-BitWidth, dl, AmtVT));
8928 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8929 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8930 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8931 SDValue OutOps[] = { OutLo, OutHi };
8932 return DAG.getMergeValues(OutOps, dl);
8933}
8934
8935SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8936 SDLoc dl(Op);
8937 EVT VT = Op.getValueType();
8938 unsigned BitWidth = VT.getSizeInBits();
8939 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))
8940 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))
8941 "Unexpected SRA!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8941, __PRETTY_FUNCTION__))
;
8942
8943 // Expand into a bunch of logical ops, followed by a select_cc.
8944 SDValue Lo = Op.getOperand(0);
8945 SDValue Hi = Op.getOperand(1);
8946 SDValue Amt = Op.getOperand(2);
8947 EVT AmtVT = Amt.getValueType();
8948
8949 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8950 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8951 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8952 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8953 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8954 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8955 DAG.getConstant(-BitWidth, dl, AmtVT));
8956 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8957 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8958 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8959 Tmp4, Tmp6, ISD::SETLE);
8960 SDValue OutOps[] = { OutLo, OutHi };
8961 return DAG.getMergeValues(OutOps, dl);
8962}
8963
8964SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8965 SelectionDAG &DAG) const {
8966 SDLoc dl(Op);
8967 EVT VT = Op.getValueType();
8968 unsigned BitWidth = VT.getSizeInBits();
8969
8970 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8971 SDValue X = Op.getOperand(0);
8972 SDValue Y = Op.getOperand(1);
8973 SDValue Z = Op.getOperand(2);
8974 EVT AmtVT = Z.getValueType();
8975
8976 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8977 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8978 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8979 // on PowerPC shift by BW being well defined.
8980 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8981 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8982 SDValue SubZ =
8983 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8984 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8985 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8986 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8987}
8988
8989//===----------------------------------------------------------------------===//
8990// Vector related lowering.
8991//
8992
8993/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8994/// element size of SplatSize. Cast the result to VT.
8995static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8996 SelectionDAG &DAG, const SDLoc &dl) {
8997 static const MVT VTys[] = { // canonical VT to use for each size.
8998 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8999 };
9000
9001 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9002
9003 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9004 if (Val == ((1LU << (SplatSize * 8)) - 1)) {
9005 SplatSize = 1;
9006 Val = 0xFF;
9007 }
9008
9009 EVT CanonicalVT = VTys[SplatSize-1];
9010
9011 // Build a canonical splat for this value.
9012 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9013}
9014
9015/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9016/// specified intrinsic ID.
9017static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9018 const SDLoc &dl, EVT DestVT = MVT::Other) {
9019 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9020 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9021 DAG.getConstant(IID, dl, MVT::i32), Op);
9022}
9023
9024/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9025/// specified intrinsic ID.
9026static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9027 SelectionDAG &DAG, const SDLoc &dl,
9028 EVT DestVT = MVT::Other) {
9029 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9030 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9031 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9032}
9033
9034/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9035/// specified intrinsic ID.
9036static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9037 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9038 EVT DestVT = MVT::Other) {
9039 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9040 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9041 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9042}
9043
9044/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9045/// amount. The result has the specified value type.
9046static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9047 SelectionDAG &DAG, const SDLoc &dl) {
9048 // Force LHS/RHS to be the right type.
9049 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9050 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9051
9052 int Ops[16];
9053 for (unsigned i = 0; i != 16; ++i)
9054 Ops[i] = i + Amt;
9055 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9056 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9057}
9058
9059/// Do we have an efficient pattern in a .td file for this node?
9060///
9061/// \param V - pointer to the BuildVectorSDNode being matched
9062/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9063///
9064/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9065/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9066/// the opposite is true (expansion is beneficial) are:
9067/// - The node builds a vector out of integers that are not 32 or 64-bits
9068/// - The node builds a vector out of constants
9069/// - The node is a "load-and-splat"
9070/// In all other cases, we will choose to keep the BUILD_VECTOR.
9071static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9072 bool HasDirectMove,
9073 bool HasP8Vector) {
9074 EVT VecVT = V->getValueType(0);
9075 bool RightType = VecVT == MVT::v2f64 ||
9076 (HasP8Vector && VecVT == MVT::v4f32) ||
9077 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9078 if (!RightType)
9079 return false;
9080
9081 bool IsSplat = true;
9082 bool IsLoad = false;
9083 SDValue Op0 = V->getOperand(0);
9084
9085 // This function is called in a block that confirms the node is not a constant
9086 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9087 // different constants.
9088 if (V->isConstant())
9089 return false;
9090 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9091 if (V->getOperand(i).isUndef())
9092 return false;
9093 // We want to expand nodes that represent load-and-splat even if the
9094 // loaded value is a floating point truncation or conversion to int.
9095 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9096 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9097 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9098 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9099 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9100 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9101 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9102 IsLoad = true;
9103 // If the operands are different or the input is not a load and has more
9104 // uses than just this BV node, then it isn't a splat.
9105 if (V->getOperand(i) != Op0 ||
9106 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9107 IsSplat = false;
9108 }
9109 return !(IsSplat && IsLoad);
9110}
9111
9112// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9113SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9114
9115 SDLoc dl(Op);
9116 SDValue Op0 = Op->getOperand(0);
9117
9118 if ((Op.getValueType() != MVT::f128) ||
9119 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9120 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9121 (Op0.getOperand(1).getValueType() != MVT::i64))
9122 return SDValue();
9123
9124 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9125 Op0.getOperand(1));
9126}
9127
9128static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9129 const SDValue *InputLoad = &Op;
9130 if (InputLoad->getOpcode() == ISD::BITCAST)
9131 InputLoad = &InputLoad->getOperand(0);
9132 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9133 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9134 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9135 InputLoad = &InputLoad->getOperand(0);
9136 }
9137 if (InputLoad->getOpcode() != ISD::LOAD)
9138 return nullptr;
9139 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9140 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9141}
9142
9143// Convert the argument APFloat to a single precision APFloat if there is no
9144// loss in information during the conversion to single precision APFloat and the
9145// resulting number is not a denormal number. Return true if successful.
9146bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9147 APFloat APFloatToConvert = ArgAPFloat;
9148 bool LosesInfo = true;
9149 APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9150 &LosesInfo);
9151 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9152 if (Success)
9153 ArgAPFloat = APFloatToConvert;
9154 return Success;
9155}
9156
9157// Bitcast the argument APInt to a double and convert it to a single precision
9158// APFloat, bitcast the APFloat to an APInt and assign it to the original
9159// argument if there is no loss in information during the conversion from
9160// double to single precision APFloat and the resulting number is not a denormal
9161// number. Return true if successful.
9162bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9163 double DpValue = ArgAPInt.bitsToDouble();
9164 APFloat APFloatDp(DpValue);
9165 bool Success = convertToNonDenormSingle(APFloatDp);
9166 if (Success)
9167 ArgAPInt = APFloatDp.bitcastToAPInt();
9168 return Success;
9169}
9170
9171// If this is a case we can't handle, return null and let the default
9172// expansion code take care of it. If we CAN select this case, and if it
9173// selects to a single instruction, return Op. Otherwise, if we can codegen
9174// this case more efficiently than a constant pool load, lower it to the
9175// sequence of ops that should be used.
9176SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9177 SelectionDAG &DAG) const {
9178 SDLoc dl(Op);
9179 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9180 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")((BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9180, __PRETTY_FUNCTION__))
;
9181
9182 // Check if this is a splat of a constant value.
9183 APInt APSplatBits, APSplatUndef;
9184 unsigned SplatBitSize;
9185 bool HasAnyUndefs;
9186 bool BVNIsConstantSplat =
9187 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9188 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9189
9190 // If it is a splat of a double, check if we can shrink it to a 32 bit
9191 // non-denormal float which when converted back to double gives us the same
9192 // double. This is to exploit the XXSPLTIDP instruction.
9193 if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9194 (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9195 convertToNonDenormSingle(APSplatBits)) {
9196 SDValue SplatNode = DAG.getNode(
9197 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9198 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9199 return DAG.getBitcast(Op.getValueType(), SplatNode);
9200 }
9201
9202 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9203
9204 bool IsPermutedLoad = false;
9205 const SDValue *InputLoad =
9206 getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9207 // Handle load-and-splat patterns as we have instructions that will do this
9208 // in one go.
9209 if (InputLoad && DAG.isSplatValue(Op, true)) {
9210 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9211
9212 // We have handling for 4 and 8 byte elements.
9213 unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9214
9215 // Checking for a single use of this load, we have to check for vector
9216 // width (128 bits) / ElementSize uses (since each operand of the
9217 // BUILD_VECTOR is a separate use of the value.
9218 if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
9219 ((Subtarget.hasVSX() && ElementSize == 64) ||
9220 (Subtarget.hasP9Vector() && ElementSize == 32))) {
9221 SDValue Ops[] = {
9222 LD->getChain(), // Chain
9223 LD->getBasePtr(), // Ptr
9224 DAG.getValueType(Op.getValueType()) // VT
9225 };
9226 return
9227 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
9228 DAG.getVTList(Op.getValueType(), MVT::Other),
9229 Ops, LD->getMemoryVT(), LD->getMemOperand());
9230 }
9231 }
9232
9233 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9234 // lowered to VSX instructions under certain conditions.
9235 // Without VSX, there is no pattern more efficient than expanding the node.
9236 if (Subtarget.hasVSX() &&
9237 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9238 Subtarget.hasP8Vector()))
9239 return Op;
9240 return SDValue();
9241 }
9242
9243 uint64_t SplatBits = APSplatBits.getZExtValue();
9244 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9245 unsigned SplatSize = SplatBitSize / 8;
9246
9247 // First, handle single instruction cases.
9248
9249 // All zeros?
9250 if (SplatBits == 0) {
9251 // Canonicalize all zero vectors to be v4i32.
9252 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9253 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9254 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9255 }
9256 return Op;
9257 }
9258
9259 // We have XXSPLTIW for constant splats four bytes wide.
9260 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9261 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9262 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9263 // turned into a 4-byte splat of 0xABABABAB.
9264 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9265 return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'
9266 Op.getValueType(), DAG, dl);
9267
9268 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9269 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9270 dl);
9271
9272 // We have XXSPLTIB for constant splats one byte wide.
9273 if (Subtarget.hasP9Vector() && SplatSize == 1)
9274 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9275 dl);
9276
9277 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9278 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9279 (32-SplatBitSize));
9280 if (SextVal >= -16 && SextVal <= 15)
9281 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9282 dl);
9283
9284 // Two instruction sequences.
9285
9286 // If this value is in the range [-32,30] and is even, use:
9287 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9288 // If this value is in the range [17,31] and is odd, use:
9289 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9290 // If this value is in the range [-31,-17] and is odd, use:
9291 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9292 // Note the last two are three-instruction sequences.
9293 if (SextVal >= -32 && SextVal <= 31) {
9294 // To avoid having these optimizations undone by constant folding,
9295 // we convert to a pseudo that will be expanded later into one of
9296 // the above forms.
9297 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9298 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9299 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9300 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9301 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9302 if (VT == Op.getValueType())
9303 return RetVal;
9304 else
9305 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9306 }
9307
9308 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9309 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9310 // for fneg/fabs.
9311 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9312 // Make -1 and vspltisw -1:
9313 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9314
9315 // Make the VSLW intrinsic, computing 0x8000_0000.
9316 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9317 OnesV, DAG, dl);
9318
9319 // xor by OnesV to invert it.
9320 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9321 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9322 }
9323
9324 // Check to see if this is a wide variety of vsplti*, binop self cases.
9325 static const signed char SplatCsts[] = {
9326 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9327 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9328 };
9329
9330 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9331 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9332 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9333 int i = SplatCsts[idx];
9334
9335 // Figure out what shift amount will be used by altivec if shifted by i in
9336 // this splat size.
9337 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9338
9339 // vsplti + shl self.
9340 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9341 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9342 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9343 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9344 Intrinsic::ppc_altivec_vslw
9345 };
9346 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9347 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9348 }
9349
9350 // vsplti + srl self.
9351 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9352 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9353 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9354 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9355 Intrinsic::ppc_altivec_vsrw
9356 };
9357 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9358 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9359 }
9360
9361 // vsplti + sra self.
9362 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9363 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9364 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9365 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9366 Intrinsic::ppc_altivec_vsraw
9367 };
9368 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9369 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9370 }
9371
9372 // vsplti + rol self.
9373 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9374 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9375 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9376 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9377 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9378 Intrinsic::ppc_altivec_vrlw
9379 };
9380 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9381 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9382 }
9383
9384 // t = vsplti c, result = vsldoi t, t, 1
9385 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9386 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9387 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9388 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9389 }
9390 // t = vsplti c, result = vsldoi t, t, 2
9391 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9392 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9393 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9394 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9395 }
9396 // t = vsplti c, result = vsldoi t, t, 3
9397 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9398 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9399 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9400 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9401 }
9402 }
9403
9404 return SDValue();
9405}
9406
9407/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9408/// the specified operations to build the shuffle.
9409static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9410 SDValue RHS, SelectionDAG &DAG,
9411 const SDLoc &dl) {
9412 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9413 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9414 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9415
9416 enum {
9417 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9418 OP_VMRGHW,
9419 OP_VMRGLW,
9420 OP_VSPLTISW0,
9421 OP_VSPLTISW1,
9422 OP_VSPLTISW2,
9423 OP_VSPLTISW3,
9424 OP_VSLDOI4,
9425 OP_VSLDOI8,
9426 OP_VSLDOI12
9427 };
9428
9429 if (OpNum == OP_COPY) {
9430 if (LHSID == (1*9+2)*9+3) return LHS;
9431 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9431, __PRETTY_FUNCTION__))
;
9432 return RHS;
9433 }
9434
9435 SDValue OpLHS, OpRHS;
9436 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9437 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9438
9439 int ShufIdxs[16];
9440 switch (OpNum) {
9441 default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9441)
;
9442 case OP_VMRGHW:
9443 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9444 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9445 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9446 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9447 break;
9448 case OP_VMRGLW:
9449 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9450 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9451 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9452 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9453 break;
9454 case OP_VSPLTISW0:
9455 for (unsigned i = 0; i != 16; ++i)
9456 ShufIdxs[i] = (i&3)+0;
9457 break;
9458 case OP_VSPLTISW1:
9459 for (unsigned i = 0; i != 16; ++i)
9460 ShufIdxs[i] = (i&3)+4;
9461 break;
9462 case OP_VSPLTISW2:
9463 for (unsigned i = 0; i != 16; ++i)
9464 ShufIdxs[i] = (i&3)+8;
9465 break;
9466 case OP_VSPLTISW3:
9467 for (unsigned i = 0; i != 16; ++i)
9468 ShufIdxs[i] = (i&3)+12;
9469 break;
9470 case OP_VSLDOI4:
9471 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9472 case OP_VSLDOI8:
9473 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9474 case OP_VSLDOI12:
9475 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9476 }
9477 EVT VT = OpLHS.getValueType();
9478 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9479 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9480 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9481 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9482}
9483
9484/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9485/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9486/// SDValue.
9487SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9488 SelectionDAG &DAG) const {
9489 const unsigned BytesInVector = 16;
9490 bool IsLE = Subtarget.isLittleEndian();
9491 SDLoc dl(N);
9492 SDValue V1 = N->getOperand(0);
9493 SDValue V2 = N->getOperand(1);
9494 unsigned ShiftElts = 0, InsertAtByte = 0;
9495 bool Swap = false;
9496
9497 // Shifts required to get the byte we want at element 7.
9498 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9499 0, 15, 14, 13, 12, 11, 10, 9};
9500 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9501 1, 2, 3, 4, 5, 6, 7, 8};
9502
9503 ArrayRef<int> Mask = N->getMask();
9504 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9505
9506 // For each mask element, find out if we're just inserting something
9507 // from V2 into V1 or vice versa.
9508 // Possible permutations inserting an element from V2 into V1:
9509 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9510 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9511 // ...
9512 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9513 // Inserting from V1 into V2 will be similar, except mask range will be
9514 // [16,31].
9515
9516 bool FoundCandidate = false;
9517 // If both vector operands for the shuffle are the same vector, the mask
9518 // will contain only elements from the first one and the second one will be
9519 // undef.
9520 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9521 // Go through the mask of half-words to find an element that's being moved
9522 // from one vector to the other.
9523 for (unsigned i = 0; i < BytesInVector; ++i) {
9524 unsigned CurrentElement = Mask[i];
9525 // If 2nd operand is undefined, we should only look for element 7 in the
9526 // Mask.
9527 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9528 continue;
9529
9530 bool OtherElementsInOrder = true;
9531 // Examine the other elements in the Mask to see if they're in original
9532 // order.
9533 for (unsigned j = 0; j < BytesInVector; ++j) {
9534 if (j == i)
9535 continue;
9536 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9537 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9538 // in which we always assume we're always picking from the 1st operand.
9539 int MaskOffset =
9540 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9541 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9542 OtherElementsInOrder = false;
9543 break;
9544 }
9545 }
9546 // If other elements are in original order, we record the number of shifts
9547 // we need to get the element we want into element 7. Also record which byte
9548 // in the vector we should insert into.
9549 if (OtherElementsInOrder) {
9550 // If 2nd operand is undefined, we assume no shifts and no swapping.
9551 if (V2.isUndef()) {
9552 ShiftElts = 0;
9553 Swap = false;
9554 } else {
9555 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9556 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9557 : BigEndianShifts[CurrentElement & 0xF];
9558 Swap = CurrentElement < BytesInVector;
9559 }
9560 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9561 FoundCandidate = true;
9562 break;
9563 }
9564 }
9565
9566 if (!FoundCandidate)
9567 return SDValue();
9568
9569 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9570 // optionally with VECSHL if shift is required.
9571 if (Swap)
9572 std::swap(V1, V2);
9573 if (V2.isUndef())
9574 V2 = V1;
9575 if (ShiftElts) {
9576 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9577 DAG.getConstant(ShiftElts, dl, MVT::i32));
9578 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9579 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9580 }
9581 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9582 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9583}
9584
9585/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9586/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9587/// SDValue.
9588SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9589 SelectionDAG &DAG) const {
9590 const unsigned NumHalfWords = 8;
9591 const unsigned BytesInVector = NumHalfWords * 2;
9592 // Check that the shuffle is on half-words.
9593 if (!isNByteElemShuffleMask(N, 2, 1))
9594 return SDValue();
9595
9596 bool IsLE = Subtarget.isLittleEndian();
9597 SDLoc dl(N);
9598 SDValue V1 = N->getOperand(0);
9599 SDValue V2 = N->getOperand(1);
9600 unsigned ShiftElts = 0, InsertAtByte = 0;
9601 bool Swap = false;
9602
9603 // Shifts required to get the half-word we want at element 3.
9604 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9605 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9606
9607 uint32_t Mask = 0;
9608 uint32_t OriginalOrderLow = 0x1234567;
9609 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9610 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9611 // 32-bit space, only need 4-bit nibbles per element.
9612 for (unsigned i = 0; i < NumHalfWords; ++i) {
9613 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9614 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9615 }
9616
9617 // For each mask element, find out if we're just inserting something
9618 // from V2 into V1 or vice versa. Possible permutations inserting an element
9619 // from V2 into V1:
9620 // X, 1, 2, 3, 4, 5, 6, 7
9621 // 0, X, 2, 3, 4, 5, 6, 7
9622 // 0, 1, X, 3, 4, 5, 6, 7
9623 // 0, 1, 2, X, 4, 5, 6, 7
9624 // 0, 1, 2, 3, X, 5, 6, 7
9625 // 0, 1, 2, 3, 4, X, 6, 7
9626 // 0, 1, 2, 3, 4, 5, X, 7
9627 // 0, 1, 2, 3, 4, 5, 6, X
9628 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9629
9630 bool FoundCandidate = false;
9631 // Go through the mask of half-words to find an element that's being moved
9632 // from one vector to the other.
9633 for (unsigned i = 0; i < NumHalfWords; ++i) {
9634 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9635 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9636 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9637 uint32_t TargetOrder = 0x0;
9638
9639 // If both vector operands for the shuffle are the same vector, the mask
9640 // will contain only elements from the first one and the second one will be
9641 // undef.
9642 if (V2.isUndef()) {
9643 ShiftElts = 0;
9644 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9645 TargetOrder = OriginalOrderLow;
9646 Swap = false;
9647 // Skip if not the correct element or mask of other elements don't equal
9648 // to our expected order.
9649 if (MaskOneElt == VINSERTHSrcElem &&
9650 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9651 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9652 FoundCandidate = true;
9653 break;
9654 }
9655 } else { // If both operands are defined.
9656 // Target order is [8,15] if the current mask is between [0,7].
9657 TargetOrder =
9658 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9659 // Skip if mask of other elements don't equal our expected order.
9660 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9661 // We only need the last 3 bits for the number of shifts.
9662 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9663 : BigEndianShifts[MaskOneElt & 0x7];
9664 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9665 Swap = MaskOneElt < NumHalfWords;
9666 FoundCandidate = true;
9667 break;
9668 }
9669 }
9670 }
9671
9672 if (!FoundCandidate)
9673 return SDValue();
9674
9675 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9676 // optionally with VECSHL if shift is required.
9677 if (Swap)
9678 std::swap(V1, V2);
9679 if (V2.isUndef())
9680 V2 = V1;
9681 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9682 if (ShiftElts) {
9683 // Double ShiftElts because we're left shifting on v16i8 type.
9684 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9685 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9686 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9687 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9688 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9689 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9690 }
9691 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9692 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9693 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9694 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9695}
9696
9697/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9698/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9699/// return the default SDValue.
9700SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9701 SelectionDAG &DAG) const {
9702 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9703 // to v16i8. Peek through the bitcasts to get the actual operands.
9704 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9705 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9706
9707 auto ShuffleMask = SVN->getMask();
9708 SDValue VecShuffle(SVN, 0);
9709 SDLoc DL(SVN);
9710
9711 // Check that we have a four byte shuffle.
9712 if (!isNByteElemShuffleMask(SVN, 4, 1))
9713 return SDValue();
9714
9715 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9716 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9717 std::swap(LHS, RHS);
9718 VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9719 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9720 }
9721
9722 // Ensure that the RHS is a vector of constants.
9723 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9724 if (!BVN)
9725 return SDValue();
9726
9727 // Check if RHS is a splat of 4-bytes (or smaller).
9728 APInt APSplatValue, APSplatUndef;
9729 unsigned SplatBitSize;
9730 bool HasAnyUndefs;
9731 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9732 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9733 SplatBitSize > 32)
9734 return SDValue();
9735
9736 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9737 // The instruction splats a constant C into two words of the source vector
9738 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9739 // Thus we check that the shuffle mask is the equivalent of
9740 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9741 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9742 // within each word are consecutive, so we only need to check the first byte.
9743 SDValue Index;
9744 bool IsLE = Subtarget.isLittleEndian();
9745 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9746 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9747 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9748 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9749 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9750 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9751 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9752 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9753 else
9754 return SDValue();
9755
9756 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9757 // for XXSPLTI32DX.
9758 unsigned SplatVal = APSplatValue.getZExtValue();
9759 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9760 SplatVal |= (SplatVal << SplatBitSize);
9761
9762 SDValue SplatNode = DAG.getNode(
9763 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9764 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9765 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9766}
9767
9768/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9769/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9770/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9771/// i.e (or (shl x, C1), (srl x, 128-C1)).
9772SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9773 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL")((Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ROTL && \"Should only be called for ISD::ROTL\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9773, __PRETTY_FUNCTION__))
;
9774 assert(Op.getValueType() == MVT::v1i128 &&((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9775, __PRETTY_FUNCTION__))
9775 "Only set v1i128 as custom, other type shouldn't reach here!")((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9775, __PRETTY_FUNCTION__))
;
9776 SDLoc dl(Op);
9777 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9778 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9779 unsigned SHLAmt = N1.getConstantOperandVal(0);
9780 if (SHLAmt % 8 == 0) {
9781 SmallVector<int, 16> Mask(16, 0);
9782 std::iota(Mask.begin(), Mask.end(), 0);
9783 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9784 if (SDValue Shuffle =
9785 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9786 DAG.getUNDEF(MVT::v16i8), Mask))
9787 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9788 }
9789 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9790 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9791 DAG.getConstant(SHLAmt, dl, MVT::i32));
9792 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9793 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9794 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9795 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9796}
9797
9798/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9799/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9800/// return the code it can be lowered into. Worst case, it can always be
9801/// lowered into a vperm.
9802SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9803 SelectionDAG &DAG) const {
9804 SDLoc dl(Op);
9805 SDValue V1 = Op.getOperand(0);
9806 SDValue V2 = Op.getOperand(1);
9807 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9808
9809 // Any nodes that were combined in the target-independent combiner prior
9810 // to vector legalization will not be sent to the target combine. Try to
9811 // combine it here.
9812 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9813 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9814 return NewShuffle;
9815 Op = NewShuffle;
9816 SVOp = cast<ShuffleVectorSDNode>(Op);
9817 V1 = Op.getOperand(0);
9818 V2 = Op.getOperand(1);
9819 }
9820 EVT VT = Op.getValueType();
9821 bool isLittleEndian = Subtarget.isLittleEndian();
9822
9823 unsigned ShiftElts, InsertAtByte;
9824 bool Swap = false;
9825
9826 // If this is a load-and-splat, we can do that with a single instruction
9827 // in some cases. However if the load has multiple uses, we don't want to
9828 // combine it because that will just produce multiple loads.
9829 bool IsPermutedLoad = false;
9830 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9831 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9832 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9833 InputLoad->hasOneUse()) {
9834 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9835 int SplatIdx =
9836 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9837
9838 // The splat index for permuted loads will be in the left half of the vector
9839 // which is strictly wider than the loaded value by 8 bytes. So we need to
9840 // adjust the splat index to point to the correct address in memory.
9841 if (IsPermutedLoad) {
9842 assert(isLittleEndian && "Unexpected permuted load on big endian target")((isLittleEndian && "Unexpected permuted load on big endian target"
) ? static_cast<void> (0) : __assert_fail ("isLittleEndian && \"Unexpected permuted load on big endian target\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9842, __PRETTY_FUNCTION__))
;
9843 SplatIdx += IsFourByte ? 2 : 1;
9844 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9845, __PRETTY_FUNCTION__))
9845 "Splat of a value outside of the loaded memory")(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9845, __PRETTY_FUNCTION__))
;
9846 }
9847
9848 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9849 // For 4-byte load-and-splat, we need Power9.
9850 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9851 uint64_t Offset = 0;
9852 if (IsFourByte)
9853 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9854 else
9855 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9856
9857 SDValue BasePtr = LD->getBasePtr();
9858 if (Offset != 0)
9859 BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9860 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9861 SDValue Ops[] = {
9862 LD->getChain(), // Chain
9863 BasePtr, // BasePtr
9864 DAG.getValueType(Op.getValueType()) // VT
9865 };
9866 SDVTList VTL =
9867 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9868 SDValue LdSplt =
9869 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9870 Ops, LD->getMemoryVT(), LD->getMemOperand());
9871 if (LdSplt.getValueType() != SVOp->getValueType(0))
9872 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9873 return LdSplt;
9874 }
9875 }
9876 if (Subtarget.hasP9Vector() &&
9877 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9878 isLittleEndian)) {
9879 if (Swap)
9880 std::swap(V1, V2);
9881 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9882 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9883 if (ShiftElts) {
9884 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9885 DAG.getConstant(ShiftElts, dl, MVT::i32));
9886 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9887 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9888 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9889 }
9890 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9891 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9892 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9893 }
9894
9895 if (Subtarget.hasPrefixInstrs()) {
9896 SDValue SplatInsertNode;
9897 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9898 return SplatInsertNode;
9899 }
9900
9901 if (Subtarget.hasP9Altivec()) {
9902 SDValue NewISDNode;
9903 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9904 return NewISDNode;
9905
9906 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9907 return NewISDNode;
9908 }
9909
9910 if (Subtarget.hasVSX() &&
9911 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9912 if (Swap)
9913 std::swap(V1, V2);
9914 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9915 SDValue Conv2 =
9916 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9917
9918 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9919 DAG.getConstant(ShiftElts, dl, MVT::i32));
9920 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9921 }
9922
9923 if (Subtarget.hasVSX() &&
9924 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9925 if (Swap)
9926 std::swap(V1, V2);
9927 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9928 SDValue Conv2 =
9929 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9930
9931 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9932 DAG.getConstant(ShiftElts, dl, MVT::i32));
9933 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9934 }
9935
9936 if (Subtarget.hasP9Vector()) {
9937 if (PPC::isXXBRHShuffleMask(SVOp)) {
9938 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9939 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9940 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9941 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9942 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9943 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9944 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9945 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9946 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9947 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9948 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9949 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9950 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9951 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9952 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9953 }
9954 }
9955
9956 if (Subtarget.hasVSX()) {
9957 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9958 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9959
9960 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9961 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9962 DAG.getConstant(SplatIdx, dl, MVT::i32));
9963 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9964 }
9965
9966 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9967 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9968 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9969 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9970 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9971 }
9972 }
9973
9974 // Cases that are handled by instructions that take permute immediates
9975 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9976 // selected by the instruction selector.
9977 if (V2.isUndef()) {
9978 if (PPC::isSplatShuffleMask(SVOp, 1) ||
9979 PPC::isSplatShuffleMask(SVOp, 2) ||
9980 PPC::isSplatShuffleMask(SVOp, 4) ||
9981 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9982 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9983 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9984 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9985 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9986 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9987 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9988 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9989 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9990 (Subtarget.hasP8Altivec() && (
9991 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9992 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9993 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9994 return Op;
9995 }
9996 }
9997
9998 // Altivec has a variety of "shuffle immediates" that take two vector inputs
9999 // and produce a fixed permutation. If any of these match, do not lower to
10000 // VPERM.
10001 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10002 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10003 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10004 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10005 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10006 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10007 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10008 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10009 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10010 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10011 (Subtarget.hasP8Altivec() && (
10012 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10013 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10014 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10015 return Op;
10016
10017 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10018 // perfect shuffle table to emit an optimal matching sequence.
10019 ArrayRef<int> PermMask = SVOp->getMask();
10020
10021 unsigned PFIndexes[4];
10022 bool isFourElementShuffle = true;
10023 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10024 unsigned EltNo = 8; // Start out undef.
10025 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10026 if (PermMask[i*4+j] < 0)
10027 continue; // Undef, ignore it.
10028
10029 unsigned ByteSource = PermMask[i*4+j];
10030 if ((ByteSource & 3) != j) {
10031 isFourElementShuffle = false;
10032 break;
10033 }
10034
10035 if (EltNo == 8) {
10036 EltNo = ByteSource/4;
10037 } else if (EltNo != ByteSource/4) {
10038 isFourElementShuffle = false;
10039 break;
10040 }
10041 }
10042 PFIndexes[i] = EltNo;
10043 }
10044
10045 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10046 // perfect shuffle vector to determine if it is cost effective to do this as
10047 // discrete instructions, or whether we should use a vperm.
10048 // For now, we skip this for little endian until such time as we have a
10049 // little-endian perfect shuffle table.
10050 if (isFourElementShuffle && !isLittleEndian) {
10051 // Compute the index in the perfect shuffle table.
10052 unsigned PFTableIndex =
10053 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10054
10055 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10056 unsigned Cost = (PFEntry >> 30);
10057
10058 // Determining when to avoid vperm is tricky. Many things affect the cost
10059 // of vperm, particularly how many times the perm mask needs to be computed.
10060 // For example, if the perm mask can be hoisted out of a loop or is already
10061 // used (perhaps because there are multiple permutes with the same shuffle
10062 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
10063 // the loop requires an extra register.
10064 //
10065 // As a compromise, we only emit discrete instructions if the shuffle can be
10066 // generated in 3 or fewer operations. When we have loop information
10067 // available, if this block is within a loop, we should avoid using vperm
10068 // for 3-operation perms and use a constant pool load instead.
10069 if (Cost < 3)
10070 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10071 }
10072
10073 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10074 // vector that will get spilled to the constant pool.
10075 if (V2.isUndef()) V2 = V1;
10076
10077 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10078 // that it is in input element units, not in bytes. Convert now.
10079
10080 // For little endian, the order of the input vectors is reversed, and
10081 // the permutation mask is complemented with respect to 31. This is
10082 // necessary to produce proper semantics with the big-endian-biased vperm
10083 // instruction.
10084 EVT EltVT = V1.getValueType().getVectorElementType();
10085 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10086
10087 SmallVector<SDValue, 16> ResultMask;
10088 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10089 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10090
10091 for (unsigned j = 0; j != BytesPerElement; ++j)
10092 if (isLittleEndian)
10093 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10094 dl, MVT::i32));
10095 else
10096 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10097 MVT::i32));
10098 }
10099
10100 ShufflesHandledWithVPERM++;
10101 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10102 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "Emitting a VPERM for the following shuffle:\n"
; } } while (false)
;
10103 LLVM_DEBUG(SVOp->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { SVOp->dump(); } } while (false)
;
10104 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "With the following permute control vector:\n"
; } } while (false)
;
10105 LLVM_DEBUG(VPermMask.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { VPermMask.dump(); } } while (false)
;
10106
10107 if (isLittleEndian)
10108 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10109 V2, V1, VPermMask);
10110 else
10111 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10112 V1, V2, VPermMask);
10113}
10114
10115/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10116/// vector comparison. If it is, return true and fill in Opc/isDot with
10117/// information about the intrinsic.
10118static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10119 bool &isDot, const PPCSubtarget &Subtarget) {
10120 unsigned IntrinsicID =
10121 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10122 CompareOpc = -1;
10123 isDot = false;
10124 switch (IntrinsicID) {
10125 default:
10126 return false;
10127 // Comparison predicates.
10128 case Intrinsic::ppc_altivec_vcmpbfp_p:
10129 CompareOpc = 966;
10130 isDot = true;
10131 break;
10132 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10133 CompareOpc = 198;
10134 isDot = true;
10135 break;
10136 case Intrinsic::ppc_altivec_vcmpequb_p:
10137 CompareOpc = 6;
10138 isDot = true;
10139 break;
10140 case Intrinsic::ppc_altivec_vcmpequh_p:
10141 CompareOpc = 70;
10142 isDot = true;
10143 break;
10144 case Intrinsic::ppc_altivec_vcmpequw_p:
10145 CompareOpc = 134;
10146 isDot = true;
10147 break;
10148 case Intrinsic::ppc_altivec_vcmpequd_p:
10149 if (Subtarget.hasP8Altivec()) {
10150 CompareOpc = 199;
10151 isDot = true;
10152 } else
10153 return false;
10154 break;
10155 case Intrinsic::ppc_altivec_vcmpneb_p:
10156 case Intrinsic::ppc_altivec_vcmpneh_p:
10157 case Intrinsic::ppc_altivec_vcmpnew_p:
10158 case Intrinsic::ppc_altivec_vcmpnezb_p:
10159 case Intrinsic::ppc_altivec_vcmpnezh_p:
10160 case Intrinsic::ppc_altivec_vcmpnezw_p:
10161 if (Subtarget.hasP9Altivec()) {
10162 switch (IntrinsicID) {
10163 default:
10164 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10164)
;
10165 case Intrinsic::ppc_altivec_vcmpneb_p:
10166 CompareOpc = 7;
10167 break;
10168 case Intrinsic::ppc_altivec_vcmpneh_p:
10169 CompareOpc = 71;
10170 break;
10171 case Intrinsic::ppc_altivec_vcmpnew_p:
10172 CompareOpc = 135;
10173 break;
10174 case Intrinsic::ppc_altivec_vcmpnezb_p:
10175 CompareOpc = 263;
10176 break;
10177 case Intrinsic::ppc_altivec_vcmpnezh_p:
10178 CompareOpc = 327;
10179 break;
10180 case Intrinsic::ppc_altivec_vcmpnezw_p:
10181 CompareOpc = 391;
10182 break;
10183 }
10184 isDot = true;
10185 } else
10186 return false;
10187 break;
10188 case Intrinsic::ppc_altivec_vcmpgefp_p:
10189 CompareOpc = 454;
10190 isDot = true;
10191 break;
10192 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10193 CompareOpc = 710;
10194 isDot = true;
10195 break;
10196 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10197 CompareOpc = 774;
10198 isDot = true;
10199 break;
10200 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10201 CompareOpc = 838;
10202 isDot = true;
10203 break;
10204 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10205 CompareOpc = 902;
10206 isDot = true;
10207 break;
10208 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10209 if (Subtarget.hasP8Altivec()) {
10210 CompareOpc = 967;
10211 isDot = true;
10212 } else
10213 return false;
10214 break;
10215 case Intrinsic::ppc_altivec_vcmpgtub_p:
10216 CompareOpc = 518;
10217 isDot = true;
10218 break;
10219 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10220 CompareOpc = 582;
10221 isDot = true;
10222 break;
10223 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10224 CompareOpc = 646;
10225 isDot = true;
10226 break;
10227 case Intrinsic::ppc_altivec_vcmpgtud_p:
10228 if (Subtarget.hasP8Altivec()) {
10229 CompareOpc = 711;
10230 isDot = true;
10231 } else
10232 return false;
10233 break;
10234
10235 case Intrinsic::ppc_altivec_vcmpequq:
10236 case Intrinsic::ppc_altivec_vcmpgtsq:
10237 case Intrinsic::ppc_altivec_vcmpgtuq:
10238 if (!Subtarget.isISA3_1())
10239 return false;
10240 switch (IntrinsicID) {
10241 default:
10242 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10242)
;
10243 case Intrinsic::ppc_altivec_vcmpequq:
10244 CompareOpc = 455;
10245 break;
10246 case Intrinsic::ppc_altivec_vcmpgtsq:
10247 CompareOpc = 903;
10248 break;
10249 case Intrinsic::ppc_altivec_vcmpgtuq:
10250 CompareOpc = 647;
10251 break;
10252 }
10253 break;
10254
10255 // VSX predicate comparisons use the same infrastructure
10256 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10257 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10258 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10259 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10260 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10261 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10262 if (Subtarget.hasVSX()) {
10263 switch (IntrinsicID) {
10264 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10265 CompareOpc = 99;
10266 break;
10267 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10268 CompareOpc = 115;
10269 break;
10270 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10271 CompareOpc = 107;
10272 break;
10273 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10274 CompareOpc = 67;
10275 break;
10276 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10277 CompareOpc = 83;
10278 break;
10279 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10280 CompareOpc = 75;
10281 break;
10282 }
10283 isDot = true;
10284 } else
10285 return false;
10286 break;
10287
10288 // Normal Comparisons.
10289 case Intrinsic::ppc_altivec_vcmpbfp:
10290 CompareOpc = 966;
10291 break;
10292 case Intrinsic::ppc_altivec_vcmpeqfp:
10293 CompareOpc = 198;
10294 break;
10295 case Intrinsic::ppc_altivec_vcmpequb:
10296 CompareOpc = 6;
10297 break;
10298 case Intrinsic::ppc_altivec_vcmpequh:
10299 CompareOpc = 70;
10300 break;
10301 case Intrinsic::ppc_altivec_vcmpequw:
10302 CompareOpc = 134;
10303 break;
10304 case Intrinsic::ppc_altivec_vcmpequd:
10305 if (Subtarget.hasP8Altivec())
10306 CompareOpc = 199;
10307 else
10308 return false;
10309 break;
10310 case Intrinsic::ppc_altivec_vcmpneb:
10311 case Intrinsic::ppc_altivec_vcmpneh:
10312 case Intrinsic::ppc_altivec_vcmpnew:
10313 case Intrinsic::ppc_altivec_vcmpnezb:
10314 case Intrinsic::ppc_altivec_vcmpnezh:
10315 case Intrinsic::ppc_altivec_vcmpnezw:
10316 if (Subtarget.hasP9Altivec())
10317 switch (IntrinsicID) {
10318 default:
10319 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10319)
;
10320 case Intrinsic::ppc_altivec_vcmpneb:
10321 CompareOpc = 7;
10322 break;
10323 case Intrinsic::ppc_altivec_vcmpneh:
10324 CompareOpc = 71;
10325 break;
10326 case Intrinsic::ppc_altivec_vcmpnew:
10327 CompareOpc = 135;
10328 break;
10329 case Intrinsic::ppc_altivec_vcmpnezb:
10330 CompareOpc = 263;
10331 break;
10332 case Intrinsic::ppc_altivec_vcmpnezh:
10333 CompareOpc = 327;
10334 break;
10335 case Intrinsic::ppc_altivec_vcmpnezw:
10336 CompareOpc = 391;
10337 break;
10338 }
10339 else
10340 return false;
10341 break;
10342 case Intrinsic::ppc_altivec_vcmpgefp:
10343 CompareOpc = 454;
10344 break;
10345 case Intrinsic::ppc_altivec_vcmpgtfp:
10346 CompareOpc = 710;
10347 break;
10348 case Intrinsic::ppc_altivec_vcmpgtsb:
10349 CompareOpc = 774;
10350 break;
10351 case Intrinsic::ppc_altivec_vcmpgtsh:
10352 CompareOpc = 838;
10353 break;
10354 case Intrinsic::ppc_altivec_vcmpgtsw:
10355 CompareOpc = 902;
10356 break;
10357 case Intrinsic::ppc_altivec_vcmpgtsd:
10358 if (Subtarget.hasP8Altivec())
10359 CompareOpc = 967;
10360 else
10361 return false;
10362 break;
10363 case Intrinsic::ppc_altivec_vcmpgtub:
10364 CompareOpc = 518;
10365 break;
10366 case Intrinsic::ppc_altivec_vcmpgtuh:
10367 CompareOpc = 582;
10368 break;
10369 case Intrinsic::ppc_altivec_vcmpgtuw:
10370 CompareOpc = 646;
10371 break;
10372 case Intrinsic::ppc_altivec_vcmpgtud:
10373 if (Subtarget.hasP8Altivec())
10374 CompareOpc = 711;
10375 else
10376 return false;
10377 break;
10378 case Intrinsic::ppc_altivec_vcmpequq_p:
10379 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10380 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10381 if (!Subtarget.isISA3_1())
10382 return false;
10383 switch (IntrinsicID) {
10384 default:
10385 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10385)
;
10386 case Intrinsic::ppc_altivec_vcmpequq_p:
10387 CompareOpc = 455;
10388 break;
10389 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10390 CompareOpc = 903;
10391 break;
10392 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10393 CompareOpc = 647;
10394 break;
10395 }
10396 isDot = true;
10397 break;
10398 }
10399 return true;
10400}
10401
10402/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10403/// lower, do it, otherwise return null.
10404SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10405 SelectionDAG &DAG) const {
10406 unsigned IntrinsicID =
10407 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10408
10409 SDLoc dl(Op);
10410
10411 switch (IntrinsicID) {
10412 case Intrinsic::thread_pointer:
10413 // Reads the thread pointer register, used for __builtin_thread_pointer.
10414 if (Subtarget.isPPC64())
10415 return DAG.getRegister(PPC::X13, MVT::i64);
10416 return DAG.getRegister(PPC::R2, MVT::i32);
10417
10418 case Intrinsic::ppc_mma_disassemble_acc:
10419 case Intrinsic::ppc_mma_disassemble_pair: {
10420 int NumVecs = 2;
10421 SDValue WideVec = Op.getOperand(1);
10422 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10423 NumVecs = 4;
10424 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10425 }
10426 SmallVector<SDValue, 4> RetOps;
10427 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10428 SDValue Extract = DAG.getNode(
10429 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10430 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10431 : VecNo,
10432 dl, MVT::i64));
10433 RetOps.push_back(Extract);
10434 }
10435 return DAG.getMergeValues(RetOps, dl);
10436 }
10437 }
10438
10439 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10440 // opcode number of the comparison.
10441 int CompareOpc;
10442 bool isDot;
10443 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10444 return SDValue(); // Don't custom lower most intrinsics.
10445
10446 // If this is a non-dot comparison, make the VCMP node and we are done.
10447 if (!isDot) {
10448 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10449 Op.getOperand(1), Op.getOperand(2),
10450 DAG.getConstant(CompareOpc, dl, MVT::i32));
10451 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10452 }
10453
10454 // Create the PPCISD altivec 'dot' comparison node.
10455 SDValue Ops[] = {
10456 Op.getOperand(2), // LHS
10457 Op.getOperand(3), // RHS
10458 DAG.getConstant(CompareOpc, dl, MVT::i32)
10459 };
10460 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10461 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10462
10463 // Now that we have the comparison, emit a copy from the CR to a GPR.
10464 // This is flagged to the above dot comparison.
10465 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10466 DAG.getRegister(PPC::CR6, MVT::i32),
10467 CompNode.getValue(1));
10468
10469 // Unpack the result based on how the target uses it.
10470 unsigned BitNo; // Bit # of CR6.
10471 bool InvertBit; // Invert result?
10472 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10473 default: // Can't happen, don't crash on invalid number though.
10474 case 0: // Return the value of the EQ bit of CR6.
10475 BitNo = 0; InvertBit = false;
10476 break;
10477 case 1: // Return the inverted value of the EQ bit of CR6.
10478 BitNo = 0; InvertBit = true;
10479 break;
10480 case 2: // Return the value of the LT bit of CR6.
10481 BitNo = 2; InvertBit = false;
10482 break;
10483 case 3: // Return the inverted value of the LT bit of CR6.
10484 BitNo = 2; InvertBit = true;
10485 break;
10486 }
10487
10488 // Shift the bit into the low position.
10489 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10490 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10491 // Isolate the bit.
10492 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10493 DAG.getConstant(1, dl, MVT::i32));
10494
10495 // If we are supposed to, toggle the bit.
10496 if (InvertBit)
10497 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10498 DAG.getConstant(1, dl, MVT::i32));
10499 return Flags;
10500}
10501
10502SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10503 SelectionDAG &DAG) const {
10504 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10505 // the beginning of the argument list.
10506 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10507 SDLoc DL(Op);
10508 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10509 case Intrinsic::ppc_cfence: {
10510 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")((ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? static_cast<void> (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10510, __PRETTY_FUNCTION__))
;
10511 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")((Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10511, __PRETTY_FUNCTION__))
;
10512 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10514 Op.getOperand(ArgStart + 1)),
10515 Op.getOperand(0)),
10516 0);
10517 }
10518 default:
10519 break;
10520 }
10521 return SDValue();
10522}
10523
10524// Lower scalar BSWAP64 to xxbrd.
10525SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10526 SDLoc dl(Op);
10527 // MTVSRDD
10528 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10529 Op.getOperand(0));
10530 // XXBRD
10531 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10532 // MFVSRD
10533 int VectorIndex = 0;
10534 if (Subtarget.isLittleEndian())
10535 VectorIndex = 1;
10536 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10537 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10538 return Op;
10539}
10540
10541// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10542// compared to a value that is atomically loaded (atomic loads zero-extend).
10543SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10544 SelectionDAG &DAG) const {
10545 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10546, __PRETTY_FUNCTION__))
10546 "Expecting an atomic compare-and-swap here.")((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10546, __PRETTY_FUNCTION__))
;
10547 SDLoc dl(Op);
10548 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10549 EVT MemVT = AtomicNode->getMemoryVT();
10550 if (MemVT.getSizeInBits() >= 32)
10551 return Op;
10552
10553 SDValue CmpOp = Op.getOperand(2);
10554 // If this is already correctly zero-extended, leave it alone.
10555 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10556 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10557 return Op;
10558
10559 // Clear the high bits of the compare operand.
10560 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10561 SDValue NewCmpOp =
10562 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10563 DAG.getConstant(MaskVal, dl, MVT::i32));
10564
10565 // Replace the existing compare operand with the properly zero-extended one.
10566 SmallVector<SDValue, 4> Ops;
10567 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10568 Ops.push_back(AtomicNode->getOperand(i));
10569 Ops[2] = NewCmpOp;
10570 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10571 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10572 auto NodeTy =
10573 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10574 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10575}
10576
10577SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10578 SelectionDAG &DAG) const {
10579 SDLoc dl(Op);
10580 // Create a stack slot that is 16-byte aligned.
10581 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10582 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10583 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10584 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10585
10586 // Store the input value into Value#0 of the stack slot.
10587 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10588 MachinePointerInfo());
10589 // Load it out.
10590 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10591}
10592
10593SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10594 SelectionDAG &DAG) const {
10595 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10596, __PRETTY_FUNCTION__))
10596 "Should only be called for ISD::INSERT_VECTOR_ELT")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10596, __PRETTY_FUNCTION__))
;
10597
10598 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10599 // We have legal lowering for constant indices but not for variable ones.
10600 if (!C)
10601 return SDValue();
10602
10603 EVT VT = Op.getValueType();
10604 SDLoc dl(Op);
10605 SDValue V1 = Op.getOperand(0);
10606 SDValue V2 = Op.getOperand(1);
10607 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10608 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10609 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10610 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10611 unsigned InsertAtElement = C->getZExtValue();
10612 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10613 if (Subtarget.isLittleEndian()) {
10614 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10615 }
10616 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10617 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10618 }
10619 return Op;
10620}
10621
10622SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10623 SelectionDAG &DAG) const {
10624 SDLoc dl(Op);
10625 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10626 SDValue LoadChain = LN->getChain();
10627 SDValue BasePtr = LN->getBasePtr();
10628 EVT VT = Op.getValueType();
10629
10630 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10631 return Op;
10632
10633 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10634 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10635 // 2 or 4 vsx registers.
10636 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10637, __PRETTY_FUNCTION__))
10637 "Type unsupported without MMA")(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10637, __PRETTY_FUNCTION__))
;
10638 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))
10639 "Type unsupported without paired vector support")(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))
;
10640 Align Alignment = LN->getAlign();
10641 SmallVector<SDValue, 4> Loads;
10642 SmallVector<SDValue, 4> LoadChains;
10643 unsigned NumVecs = VT.getSizeInBits() / 128;
10644 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10645 SDValue Load =
10646 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10647 LN->getPointerInfo().getWithOffset(Idx * 16),
10648 commonAlignment(Alignment, Idx * 16),
10649 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10650 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10651 DAG.getConstant(16, dl, BasePtr.getValueType()));
10652 Loads.push_back(Load);
10653 LoadChains.push_back(Load.getValue(1));
10654 }
10655 if (Subtarget.isLittleEndian()) {
10656 std::reverse(Loads.begin(), Loads.end());
10657 std::reverse(LoadChains.begin(), LoadChains.end());
10658 }
10659 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10660 SDValue Value =
10661 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10662 dl, VT, Loads);
10663 SDValue RetOps[] = {Value, TF};
10664 return DAG.getMergeValues(RetOps, dl);
10665}
10666
10667SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10668 SelectionDAG &DAG) const {
10669 SDLoc dl(Op);
10670 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10671 SDValue StoreChain = SN->getChain();
10672 SDValue BasePtr = SN->getBasePtr();
10673 SDValue Value = SN->getValue();
10674 EVT StoreVT = Value.getValueType();
10675
10676 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10677 return Op;
10678
10679 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10680 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10681 // underlying registers individually.
10682 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10683, __PRETTY_FUNCTION__))
10683 "Type unsupported without MMA")(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10683, __PRETTY_FUNCTION__))
;
10684 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))
10685 "Type unsupported without paired vector support")(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))
;
10686 Align Alignment = SN->getAlign();
10687 SmallVector<SDValue, 4> Stores;
10688 unsigned NumVecs = 2;
10689 if (StoreVT == MVT::v512i1) {
10690 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10691 NumVecs = 4;
10692 }
10693 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10694 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10695 SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10696 DAG.getConstant(VecNum, dl, MVT::i64));
10697 SDValue Store =
10698 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10699 SN->getPointerInfo().getWithOffset(Idx * 16),
10700 commonAlignment(Alignment, Idx * 16),
10701 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10702 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10703 DAG.getConstant(16, dl, BasePtr.getValueType()));
10704 Stores.push_back(Store);
10705 }
10706 SDValue TF = DAG.getTokenFactor(dl, Stores);
10707 return TF;
10708}
10709
10710SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10711 SDLoc dl(Op);
10712 if (Op.getValueType() == MVT::v4i32) {
10713 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10714
10715 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10716 // +16 as shift amt.
10717 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10718 SDValue RHSSwap = // = vrlw RHS, 16
10719 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10720
10721 // Shrinkify inputs to v8i16.
10722 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10723 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10724 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10725
10726 // Low parts multiplied together, generating 32-bit results (we ignore the
10727 // top parts).
10728 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10729 LHS, RHS, DAG, dl, MVT::v4i32);
10730
10731 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10732 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10733 // Shift the high parts up 16 bits.
10734 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10735 Neg16, DAG, dl);
10736 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10737 } else if (Op.getValueType() == MVT::v16i8) {
10738 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10739 bool isLittleEndian = Subtarget.isLittleEndian();
10740
10741 // Multiply the even 8-bit parts, producing 16-bit sums.
10742 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10743 LHS, RHS, DAG, dl, MVT::v8i16);
10744 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10745
10746 // Multiply the odd 8-bit parts, producing 16-bit sums.
10747 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10748 LHS, RHS, DAG, dl, MVT::v8i16);
10749 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10750
10751 // Merge the results together. Because vmuleub and vmuloub are
10752 // instructions with a big-endian bias, we must reverse the
10753 // element numbering and reverse the meaning of "odd" and "even"
10754 // when generating little endian code.
10755 int Ops[16];
10756 for (unsigned i = 0; i != 8; ++i) {
10757 if (isLittleEndian) {
10758 Ops[i*2 ] = 2*i;
10759 Ops[i*2+1] = 2*i+16;
10760 } else {
10761 Ops[i*2 ] = 2*i+1;
10762 Ops[i*2+1] = 2*i+1+16;
10763 }
10764 }
10765 if (isLittleEndian)
10766 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10767 else
10768 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10769 } else {
10770 llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10770)
;
10771 }
10772}
10773
10774SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10775
10776 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS")((Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ABS && \"Should only be called for ISD::ABS\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10776, __PRETTY_FUNCTION__))
;
10777
10778 EVT VT = Op.getValueType();
10779 assert(VT.isVector() &&((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10780, __PRETTY_FUNCTION__))
10780 "Only set vector abs as custom, scalar abs shouldn't reach here!")((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10780, __PRETTY_FUNCTION__))
;
10781 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))
10782 VT == MVT::v16i8) &&(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))
10783 "Unexpected vector element type!")(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10783, __PRETTY_FUNCTION__))
;
10784 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
10785 "Current subtarget doesn't support smax v2i64!")(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
;
10786
10787 // For vector abs, it can be lowered to:
10788 // abs x
10789 // ==>
10790 // y = -x
10791 // smax(x, y)
10792
10793 SDLoc dl(Op);
10794 SDValue X = Op.getOperand(0);
10795 SDValue Zero = DAG.getConstant(0, dl, VT);
10796 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10797
10798 // SMAX patch https://reviews.llvm.org/D47332
10799 // hasn't landed yet, so use intrinsic first here.
10800 // TODO: Should use SMAX directly once SMAX patch landed
10801 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10802 if (VT == MVT::v2i64)
10803 BifID = Intrinsic::ppc_altivec_vmaxsd;
10804 else if (VT == MVT::v8i16)
10805 BifID = Intrinsic::ppc_altivec_vmaxsh;
10806 else if (VT == MVT::v16i8)
10807 BifID = Intrinsic::ppc_altivec_vmaxsb;
10808
10809 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10810}
10811
10812// Custom lowering for fpext vf32 to v2f64
10813SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10814
10815 assert(Op.getOpcode() == ISD::FP_EXTEND &&((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10816, __PRETTY_FUNCTION__))
10816 "Should only be called for ISD::FP_EXTEND")((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10816, __PRETTY_FUNCTION__))
;
10817
10818 // FIXME: handle extends from half precision float vectors on P9.
10819 // We only want to custom lower an extend from v2f32 to v2f64.
10820 if (Op.getValueType() != MVT::v2f64 ||
10821 Op.getOperand(0).getValueType() != MVT::v2f32)
10822 return SDValue();
10823
10824 SDLoc dl(Op);
10825 SDValue Op0 = Op.getOperand(0);
10826
10827 switch (Op0.getOpcode()) {
10828 default:
10829 return SDValue();
10830 case ISD::EXTRACT_SUBVECTOR: {
10831 assert(Op0.getNumOperands() == 2 &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))
10832 isa<ConstantSDNode>(Op0->getOperand(1)) &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))
10833 "Node should have 2 operands with second one being a constant!")((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10833, __PRETTY_FUNCTION__))
;
10834
10835 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10836 return SDValue();
10837
10838 // Custom lower is only done for high or low doubleword.
10839 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10840 if (Idx % 2 != 0)
10841 return SDValue();
10842
10843 // Since input is v4f32, at this point Idx is either 0 or 2.
10844 // Shift to get the doubleword position we want.
10845 int DWord = Idx >> 1;
10846
10847 // High and low word positions are different on little endian.
10848 if (Subtarget.isLittleEndian())
10849 DWord ^= 0x1;
10850
10851 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10852 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10853 }
10854 case ISD::FADD:
10855 case ISD::FMUL:
10856 case ISD::FSUB: {
10857 SDValue NewLoad[2];
10858 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10859 // Ensure both input are loads.
10860 SDValue LdOp = Op0.getOperand(i);
10861 if (LdOp.getOpcode() != ISD::LOAD)
10862 return SDValue();
10863 // Generate new load node.
10864 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10865 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10866 NewLoad[i] = DAG.getMemIntrinsicNode(
10867 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10868 LD->getMemoryVT(), LD->getMemOperand());
10869 }
10870 SDValue NewOp =
10871 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10872 NewLoad[1], Op0.getNode()->getFlags());
10873 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10874 DAG.getConstant(0, dl, MVT::i32));
10875 }
10876 case ISD::LOAD: {
10877 LoadSDNode *LD = cast<LoadSDNode>(Op0);
10878 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10879 SDValue NewLd = DAG.getMemIntrinsicNode(
10880 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10881 LD->getMemoryVT(), LD->getMemOperand());
10882 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10883 DAG.getConstant(0, dl, MVT::i32));
10884 }
10885 }
10886 llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10886)
;
10887}
10888
10889/// LowerOperation - Provide custom lowering hooks for some operations.
10890///
10891SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10892 switch (Op.getOpcode()) {
10893 default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10893)
;
10894 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10895 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10896 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10897 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10898 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10899 case ISD::SETCC: return LowerSETCC(Op, DAG);
10900 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10901 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10902
10903 // Variable argument lowering.
10904 case ISD::VASTART: return LowerVASTART(Op, DAG);
10905 case ISD::VAARG: return LowerVAARG(Op, DAG);
10906 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10907
10908 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10909 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10910 case ISD::GET_DYNAMIC_AREA_OFFSET:
10911 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10912
10913 // Exception handling lowering.
10914 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10915 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10916 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10917
10918 case ISD::LOAD: return LowerLOAD(Op, DAG);
10919 case ISD::STORE: return LowerSTORE(Op, DAG);
10920 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10921 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10922 case ISD::STRICT_FP_TO_UINT:
10923 case ISD::STRICT_FP_TO_SINT:
10924 case ISD::FP_TO_UINT:
10925 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10926 case ISD::STRICT_UINT_TO_FP:
10927 case ISD::STRICT_SINT_TO_FP:
10928 case ISD::UINT_TO_FP:
10929 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10930 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10931
10932 // Lower 64-bit shifts.
10933 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10934 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10935 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10936
10937 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10938 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10939
10940 // Vector-related lowering.
10941 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10942 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10943 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10944 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10945 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10946 case ISD::MUL: return LowerMUL(Op, DAG);
10947 case ISD::ABS: return LowerABS(Op, DAG);
10948 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10949 case ISD::ROTL: return LowerROTL(Op, DAG);
10950
10951 // For counter-based loop handling.
10952 case ISD::INTRINSIC_W_CHAIN: return SDValue();
10953
10954 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10955
10956 // Frame & Return address.
10957 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10958 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10959
10960 case ISD::INTRINSIC_VOID:
10961 return LowerINTRINSIC_VOID(Op, DAG);
10962 case ISD::BSWAP:
10963 return LowerBSWAP(Op, DAG);
10964 case ISD::ATOMIC_CMP_SWAP:
10965 return LowerATOMIC_CMP_SWAP(Op, DAG);
10966 }
10967}
10968
10969void PPCTargetLowering::LowerOperationWrapper(SDNode *N,
10970 SmallVectorImpl<SDValue> &Results,
10971 SelectionDAG &DAG) const {
10972 SDValue Res = LowerOperation(SDValue(N, 0), DAG);
10973
10974 if (!Res.getNode())
10975 return;
10976
10977 // Take the return value as-is if original node has only one result.
10978 if (N->getNumValues() == 1) {
10979 Results.push_back(Res);
10980 return;
10981 }
10982
10983 // New node should have the same number of results.
10984 assert((N->getNumValues() == Res->getNumValues()) &&(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10985, __PRETTY_FUNCTION__))
10985 "Lowering returned the wrong number of results!")(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10985, __PRETTY_FUNCTION__))
;
10986
10987 for (unsigned i = 0; i < N->getNumValues(); ++i)
10988 Results.push_back(Res.getValue(i));
10989}
10990
10991void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10992 SmallVectorImpl<SDValue>&Results,
10993 SelectionDAG &DAG) const {
10994 SDLoc dl(N);
10995 switch (N->getOpcode()) {
10996 default:
10997 llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10997)
;
10998 case ISD::READCYCLECOUNTER: {
10999 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11000 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11001
11002 Results.push_back(
11003 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11004 Results.push_back(RTB.getValue(2));
11005 break;
11006 }
11007 case ISD::INTRINSIC_W_CHAIN: {
11008 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11009 Intrinsic::loop_decrement)
11010 break;
11011
11012 assert(N->getValueType(0) == MVT::i1 &&((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11013, __PRETTY_FUNCTION__))
11013 "Unexpected result type for CTR decrement intrinsic")((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11013, __PRETTY_FUNCTION__))
;
11014 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11015 N->getValueType(0));
11016 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11017 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11018 N->getOperand(1));
11019
11020 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11021 Results.push_back(NewInt.getValue(1));
11022 break;
11023 }
11024 case ISD::VAARG: {
11025 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11026 return;
11027
11028 EVT VT = N->getValueType(0);
11029
11030 if (VT == MVT::i64) {
11031 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11032
11033 Results.push_back(NewNode);
11034 Results.push_back(NewNode.getValue(1));
11035 }
11036 return;
11037 }
11038 case ISD::STRICT_FP_TO_SINT:
11039 case ISD::STRICT_FP_TO_UINT:
11040 case ISD::FP_TO_SINT:
11041 case ISD::FP_TO_UINT:
11042 // LowerFP_TO_INT() can only handle f32 and f64.
11043 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11044 MVT::ppcf128)
11045 return;
11046 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11047 return;
11048 case ISD::TRUNCATE: {
11049 if (!N->getValueType(0).isVector())
11050 return;
11051 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11052 if (Lowered)
11053 Results.push_back(Lowered);
11054 return;
11055 }
11056 case ISD::FSHL:
11057 case ISD::FSHR:
11058 // Don't handle funnel shifts here.
11059 return;
11060 case ISD::BITCAST:
11061 // Don't handle bitcast here.
11062 return;
11063 case ISD::FP_EXTEND:
11064 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11065 if (Lowered)
11066 Results.push_back(Lowered);
11067 return;
11068 }
11069}
11070
11071//===----------------------------------------------------------------------===//
11072// Other Lowering Code
11073//===----------------------------------------------------------------------===//
11074
11075static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
11076 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11077 Function *Func = Intrinsic::getDeclaration(M, Id);
11078 return Builder.CreateCall(Func, {});
11079}
11080
11081// The mappings for emitLeading/TrailingFence is taken from
11082// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11083Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11084 Instruction *Inst,
11085 AtomicOrdering Ord) const {
11086 if (Ord == AtomicOrdering::SequentiallyConsistent)
11087 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11088 if (isReleaseOrStronger(Ord))
11089 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11090 return nullptr;
11091}
11092
11093Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11094 Instruction *Inst,
11095 AtomicOrdering Ord) const {
11096 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11097 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11098 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11099 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11100 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11101 return Builder.CreateCall(
11102 Intrinsic::getDeclaration(
11103 Builder.GetInsertBlock()->getParent()->getParent(),
11104 Intrinsic::ppc_cfence, {Inst->getType()}),
11105 {Inst});
11106 // FIXME: Can use isync for rmw operation.
11107 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11108 }
11109 return nullptr;
11110}
11111
11112MachineBasicBlock *
11113PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11114 unsigned AtomicSize,
11115 unsigned BinOpcode,
11116 unsigned CmpOpcode,
11117 unsigned CmpPred) const {
11118 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11119 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11120
11121 auto LoadMnemonic = PPC::LDARX;
11122 auto StoreMnemonic = PPC::STDCX;
11123 switch (AtomicSize) {
11124 default:
11125 llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11125)
;
11126 case 1:
11127 LoadMnemonic = PPC::LBARX;
11128 StoreMnemonic = PPC::STBCX;
11129 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11129, __PRETTY_FUNCTION__))
;
11130 break;
11131 case 2:
11132 LoadMnemonic = PPC::LHARX;
11133 StoreMnemonic = PPC::STHCX;
11134 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11134, __PRETTY_FUNCTION__))
;
11135 break;
11136 case 4:
11137 LoadMnemonic = PPC::LWARX;
11138 StoreMnemonic = PPC::STWCX;
11139 break;
11140 case 8:
11141 LoadMnemonic = PPC::LDARX;
11142 StoreMnemonic = PPC::STDCX;
11143 break;
11144 }
11145
11146 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11147 MachineFunction *F = BB->getParent();
11148 MachineFunction::iterator It = ++BB->getIterator();
11149
11150 Register dest = MI.getOperand(0).getReg();
11151 Register ptrA = MI.getOperand(1).getReg();
11152 Register ptrB = MI.getOperand(2).getReg();
11153 Register incr = MI.getOperand(3).getReg();
11154 DebugLoc dl = MI.getDebugLoc();
11155
11156 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11157 MachineBasicBlock *loop2MBB =
11158 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11159 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11160 F->insert(It, loopMBB);
11161 if (CmpOpcode)
11162 F->insert(It, loop2MBB);
11163 F->insert(It, exitMBB);
11164 exitMBB->splice(exitMBB->begin(), BB,
11165 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11166 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11167
11168 MachineRegisterInfo &RegInfo = F->getRegInfo();
11169 Register TmpReg = (!BinOpcode) ? incr :
11170 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11171 : &PPC::GPRCRegClass);
11172
11173 // thisMBB:
11174 // ...
11175 // fallthrough --> loopMBB
11176 BB->addSuccessor(loopMBB);
11177
11178 // loopMBB:
11179 // l[wd]arx dest, ptr
11180 // add r0, dest, incr
11181 // st[wd]cx. r0, ptr
11182 // bne- loopMBB
11183 // fallthrough --> exitMBB
11184
11185 // For max/min...
11186 // loopMBB:
11187 // l[wd]arx dest, ptr
11188 // cmpl?[wd] incr, dest
11189 // bgt exitMBB
11190 // loop2MBB:
11191 // st[wd]cx. dest, ptr
11192 // bne- loopMBB
11193 // fallthrough --> exitMBB
11194
11195 BB = loopMBB;
11196 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11197 .addReg(ptrA).addReg(ptrB);
11198 if (BinOpcode)
11199 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11200 if (CmpOpcode) {
11201 // Signed comparisons of byte or halfword values must be sign-extended.
11202 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11203 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11204 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11205 ExtReg).addReg(dest);
11206 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11207 .addReg(incr).addReg(ExtReg);
11208 } else
11209 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11210 .addReg(incr).addReg(dest);
11211
11212 BuildMI(BB, dl, TII->get(PPC::BCC))
11213 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11214 BB->addSuccessor(loop2MBB);
11215 BB->addSuccessor(exitMBB);
11216 BB = loop2MBB;
11217 }
11218 BuildMI(BB, dl, TII->get(StoreMnemonic))
11219 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11220 BuildMI(BB, dl, TII->get(PPC::BCC))
11221 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11222 BB->addSuccessor(loopMBB);
11223 BB->addSuccessor(exitMBB);
11224
11225 // exitMBB:
11226 // ...
11227 BB = exitMBB;
11228 return BB;
11229}
11230
11231MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11232 MachineInstr &MI, MachineBasicBlock *BB,
11233 bool is8bit, // operation
11234 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11235 // If we support part-word atomic mnemonics, just use them
11236 if (Subtarget.hasPartwordAtomics())
11237 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11238 CmpPred);
11239
11240 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11241 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11242 // In 64 bit mode we have to use 64 bits for addresses, even though the
11243 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11244 // registers without caring whether they're 32 or 64, but here we're
11245 // doing actual arithmetic on the addresses.
11246 bool is64bit = Subtarget.isPPC64();
11247 bool isLittleEndian = Subtarget.isLittleEndian();
11248 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11249
11250 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11251 MachineFunction *F = BB->getParent();
11252 MachineFunction::iterator It = ++BB->getIterator();
11253
11254 Register dest = MI.getOperand(0).getReg();
11255 Register ptrA = MI.getOperand(1).getReg();
11256 Register ptrB = MI.getOperand(2).getReg();
11257 Register incr = MI.getOperand(3).getReg();
11258 DebugLoc dl = MI.getDebugLoc();
11259
11260 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11261 MachineBasicBlock *loop2MBB =
11262 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11263 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11264 F->insert(It, loopMBB);
11265 if (CmpOpcode)
11266 F->insert(It, loop2MBB);
11267 F->insert(It, exitMBB);
11268 exitMBB->splice(exitMBB->begin(), BB,
11269 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11270 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11271
11272 MachineRegisterInfo &RegInfo = F->getRegInfo();
11273 const TargetRegisterClass *RC =
11274 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11275 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11276
11277 Register PtrReg = RegInfo.createVirtualRegister(RC);
11278 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11279 Register ShiftReg =
11280 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11281 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11282 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11283 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11284 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11285 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11286 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11287 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11288 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11289 Register Ptr1Reg;
11290 Register TmpReg =
11291 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11292
11293 // thisMBB:
11294 // ...
11295 // fallthrough --> loopMBB
11296 BB->addSuccessor(loopMBB);
11297
11298 // The 4-byte load must be aligned, while a char or short may be
11299 // anywhere in the word. Hence all this nasty bookkeeping code.
11300 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11301 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11302 // xori shift, shift1, 24 [16]
11303 // rlwinm ptr, ptr1, 0, 0, 29
11304 // slw incr2, incr, shift
11305 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11306 // slw mask, mask2, shift
11307 // loopMBB:
11308 // lwarx tmpDest, ptr
11309 // add tmp, tmpDest, incr2
11310 // andc tmp2, tmpDest, mask
11311 // and tmp3, tmp, mask
11312 // or tmp4, tmp3, tmp2
11313 // stwcx. tmp4, ptr
11314 // bne- loopMBB
11315 // fallthrough --> exitMBB
11316 // srw dest, tmpDest, shift
11317 if (ptrA != ZeroReg) {
11318 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11319 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11320 .addReg(ptrA)
11321 .addReg(ptrB);
11322 } else {
11323 Ptr1Reg = ptrB;
11324 }
11325 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11326 // mode.
11327 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11328 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11329 .addImm(3)
11330 .addImm(27)
11331 .addImm(is8bit ? 28 : 27);
11332 if (!isLittleEndian)
11333 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11334 .addReg(Shift1Reg)
11335 .addImm(is8bit ? 24 : 16);
11336 if (is64bit)
11337 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11338 .addReg(Ptr1Reg)
11339 .addImm(0)
11340 .addImm(61);
11341 else
11342 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11343 .addReg(Ptr1Reg)
11344 .addImm(0)
11345 .addImm(0)
11346 .addImm(29);
11347 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11348 if (is8bit)
11349 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11350 else {
11351 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11352 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11353 .addReg(Mask3Reg)
11354 .addImm(65535);
11355 }
11356 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11357 .addReg(Mask2Reg)
11358 .addReg(ShiftReg);
11359
11360 BB = loopMBB;
11361 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11362 .addReg(ZeroReg)
11363 .addReg(PtrReg);
11364 if (BinOpcode)
11365 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11366 .addReg(Incr2Reg)
11367 .addReg(TmpDestReg);
11368 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11369 .addReg(TmpDestReg)
11370 .addReg(MaskReg);
11371 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11372 if (CmpOpcode) {
11373 // For unsigned comparisons, we can directly compare the shifted values.
11374 // For signed comparisons we shift and sign extend.
11375 Register SReg = RegInfo.createVirtualRegister(GPRC);
11376 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11377 .addReg(TmpDestReg)
11378 .addReg(MaskReg);
11379 unsigned ValueReg = SReg;
11380 unsigned CmpReg = Incr2Reg;
11381 if (CmpOpcode == PPC::CMPW) {
11382 ValueReg = RegInfo.createVirtualRegister(GPRC);
11383 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11384 .addReg(SReg)
11385 .addReg(ShiftReg);
11386 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11387 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11388 .addReg(ValueReg);
11389 ValueReg = ValueSReg;
11390 CmpReg = incr;
11391 }
11392 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11393 .addReg(CmpReg)
11394 .addReg(ValueReg);
11395 BuildMI(BB, dl, TII->get(PPC::BCC))
11396 .addImm(CmpPred)
11397 .addReg(PPC::CR0)
11398 .addMBB(exitMBB);
11399 BB->addSuccessor(loop2MBB);
11400 BB->addSuccessor(exitMBB);
11401 BB = loop2MBB;
11402 }
11403 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11404 BuildMI(BB, dl, TII->get(PPC::STWCX))
11405 .addReg(Tmp4Reg)
11406 .addReg(ZeroReg)
11407 .addReg(PtrReg);
11408 BuildMI(BB, dl, TII->get(PPC::BCC))
11409 .addImm(PPC::PRED_NE)
11410 .addReg(PPC::CR0)
11411 .addMBB(loopMBB);
11412 BB->addSuccessor(loopMBB);
11413 BB->addSuccessor(exitMBB);
11414
11415 // exitMBB:
11416 // ...
11417 BB = exitMBB;
11418 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11419 .addReg(TmpDestReg)
11420 .addReg(ShiftReg);
11421 return BB;
11422}
11423
11424llvm::MachineBasicBlock *
11425PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11426 MachineBasicBlock *MBB) const {
11427 DebugLoc DL = MI.getDebugLoc();
11428 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11429 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11430
11431 MachineFunction *MF = MBB->getParent();
11432 MachineRegisterInfo &MRI = MF->getRegInfo();
11433
11434 const BasicBlock *BB = MBB->getBasicBlock();
11435 MachineFunction::iterator I = ++MBB->getIterator();
11436
11437 Register DstReg = MI.getOperand(0).getReg();
11438 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11439 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")((TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"
) ? static_cast<void> (0) : __assert_fail ("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11439, __PRETTY_FUNCTION__))
;
11440 Register mainDstReg = MRI.createVirtualRegister(RC);
11441 Register restoreDstReg = MRI.createVirtualRegister(RC);
11442
11443 MVT PVT = getPointerTy(MF->getDataLayout());
11444 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11445, __PRETTY_FUNCTION__))
11445 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11445, __PRETTY_FUNCTION__))
;
11446 // For v = setjmp(buf), we generate
11447 //
11448 // thisMBB:
11449 // SjLjSetup mainMBB
11450 // bl mainMBB
11451 // v_restore = 1
11452 // b sinkMBB
11453 //
11454 // mainMBB:
11455 // buf[LabelOffset] = LR
11456 // v_main = 0
11457 //
11458 // sinkMBB:
11459 // v = phi(main, restore)
11460 //
11461
11462 MachineBasicBlock *thisMBB = MBB;
11463 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11464 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11465 MF->insert(I, mainMBB);
11466 MF->insert(I, sinkMBB);
11467
11468 MachineInstrBuilder MIB;
11469
11470 // Transfer the remainder of BB and its successor edges to sinkMBB.
11471 sinkMBB->splice(sinkMBB->begin(), MBB,
11472 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11473 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11474
11475 // Note that the structure of the jmp_buf used here is not compatible
11476 // with that used by libc, and is not designed to be. Specifically, it
11477 // stores only those 'reserved' registers that LLVM does not otherwise
11478 // understand how to spill. Also, by convention, by the time this
11479 // intrinsic is called, Clang has already stored the frame address in the
11480 // first slot of the buffer and stack address in the third. Following the
11481 // X86 target code, we'll store the jump address in the second slot. We also
11482 // need to save the TOC pointer (R2) to handle jumps between shared
11483 // libraries, and that will be stored in the fourth slot. The thread
11484 // identifier (R13) is not affected.
11485
11486 // thisMBB:
11487 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11488 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11489 const int64_t BPOffset = 4 * PVT.getStoreSize();
11490
11491 // Prepare IP either in reg.
11492 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11493 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11494 Register BufReg = MI.getOperand(1).getReg();
11495
11496 if (Subtarget.is64BitELFABI()) {
11497 setUsesTOCBasePtr(*MBB->getParent());
11498 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11499 .addReg(PPC::X2)
11500 .addImm(TOCOffset)
11501 .addReg(BufReg)
11502 .cloneMemRefs(MI);
11503 }
11504
11505 // Naked functions never have a base pointer, and so we use r1. For all
11506 // other functions, this decision must be delayed until during PEI.
11507 unsigned BaseReg;
11508 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11509 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11510 else
11511 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11512
11513 MIB = BuildMI(*thisMBB, MI, DL,
11514 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11515 .addReg(BaseReg)
11516 .addImm(BPOffset)
11517 .addReg(BufReg)
11518 .cloneMemRefs(MI);
11519
11520 // Setup
11521 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11522 MIB.addRegMask(TRI->getNoPreservedMask());
11523
11524 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11525
11526 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11527 .addMBB(mainMBB);
11528 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11529
11530 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11531 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11532
11533 // mainMBB:
11534 // mainDstReg = 0
11535 MIB =
11536 BuildMI(mainMBB, DL,
11537 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11538
11539 // Store IP
11540 if (Subtarget.isPPC64()) {
11541 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11542 .addReg(LabelReg)
11543 .addImm(LabelOffset)
11544 .addReg(BufReg);
11545 } else {
11546 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11547 .addReg(LabelReg)
11548 .addImm(LabelOffset)
11549 .addReg(BufReg);
11550 }
11551 MIB.cloneMemRefs(MI);
11552
11553 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11554 mainMBB->addSuccessor(sinkMBB);
11555
11556 // sinkMBB:
11557 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11558 TII->get(PPC::PHI), DstReg)
11559 .addReg(mainDstReg).addMBB(mainMBB)
11560 .addReg(restoreDstReg).addMBB(thisMBB);
11561
11562 MI.eraseFromParent();
11563 return sinkMBB;
11564}
11565
11566MachineBasicBlock *
11567PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11568 MachineBasicBlock *MBB) const {
11569 DebugLoc DL = MI.getDebugLoc();
11570 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11571
11572 MachineFunction *MF = MBB->getParent();
11573 MachineRegisterInfo &MRI = MF->getRegInfo();
11574
11575 MVT PVT = getPointerTy(MF->getDataLayout());
11576 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11577, __PRETTY_FUNCTION__))
11577 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11577, __PRETTY_FUNCTION__))
;
11578
11579 const TargetRegisterClass *RC =
11580 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11581 Register Tmp = MRI.createVirtualRegister(RC);
11582 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11583 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11584 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11585 unsigned BP =
11586 (PVT == MVT::i64)
11587 ? PPC::X30
11588 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11589 : PPC::R30);
11590
11591 MachineInstrBuilder MIB;
11592
11593 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11594 const int64_t SPOffset = 2 * PVT.getStoreSize();
11595 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11596 const int64_t BPOffset = 4 * PVT.getStoreSize();
11597
11598 Register BufReg = MI.getOperand(0).getReg();
11599
11600 // Reload FP (the jumped-to function may not have had a
11601 // frame pointer, and if so, then its r31 will be restored
11602 // as necessary).
11603 if (PVT == MVT::i64) {
11604 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11605 .addImm(0)
11606 .addReg(BufReg);
11607 } else {
11608 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11609 .addImm(0)
11610 .addReg(BufReg);
11611 }
11612 MIB.cloneMemRefs(MI);
11613
11614 // Reload IP
11615 if (PVT == MVT::i64) {
11616 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11617 .addImm(LabelOffset)
11618 .addReg(BufReg);
11619 } else {
11620 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11621 .addImm(LabelOffset)
11622 .addReg(BufReg);
11623 }
11624 MIB.cloneMemRefs(MI);
11625
11626 // Reload SP
11627 if (PVT == MVT::i64) {
11628 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11629 .addImm(SPOffset)
11630 .addReg(BufReg);
11631 } else {
11632 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11633 .addImm(SPOffset)
11634 .addReg(BufReg);
11635 }
11636 MIB.cloneMemRefs(MI);
11637
11638 // Reload BP
11639 if (PVT == MVT::i64) {
11640 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11641 .addImm(BPOffset)
11642 .addReg(BufReg);
11643 } else {
11644 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11645 .addImm(BPOffset)
11646 .addReg(BufReg);
11647 }
11648 MIB.cloneMemRefs(MI);
11649
11650 // Reload TOC
11651 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11652 setUsesTOCBasePtr(*MBB->getParent());
11653 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11654 .addImm(TOCOffset)
11655 .addReg(BufReg)
11656 .cloneMemRefs(MI);
11657 }
11658
11659 // Jump
11660 BuildMI(*MBB, MI, DL,
11661 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11662 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11663
11664 MI.eraseFromParent();
11665 return MBB;
11666}
11667
11668bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11669 // If the function specifically requests inline stack probes, emit them.
11670 if (MF.getFunction().hasFnAttribute("probe-stack"))
11671 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11672 "inline-asm";
11673 return false;
11674}
11675
11676unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11677 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11678 unsigned StackAlign = TFI->getStackAlignment();
11679 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11680, __PRETTY_FUNCTION__))
11680 "Unexpected stack alignment")((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11680, __PRETTY_FUNCTION__))
;
11681 // The default stack probe size is 4096 if the function has no
11682 // stack-probe-size attribute.
11683 unsigned StackProbeSize = 4096;
11684 const Function &Fn = MF.getFunction();
11685 if (Fn.hasFnAttribute("stack-probe-size"))
11686 Fn.getFnAttribute("stack-probe-size")
11687 .getValueAsString()
11688 .getAsInteger(0, StackProbeSize);
11689 // Round down to the stack alignment.
11690 StackProbeSize &= ~(StackAlign - 1);
11691 return StackProbeSize ? StackProbeSize : StackAlign;
11692}
11693
11694// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11695// into three phases. In the first phase, it uses pseudo instruction
11696// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11697// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11698// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11699// MaxCallFrameSize so that it can calculate correct data area pointer.
11700MachineBasicBlock *
11701PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11702 MachineBasicBlock *MBB) const {
11703 const bool isPPC64 = Subtarget.isPPC64();
11704 MachineFunction *MF = MBB->getParent();
11705 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11706 DebugLoc DL = MI.getDebugLoc();
11707 const unsigned ProbeSize = getStackProbeSize(*MF);
11708 const BasicBlock *ProbedBB = MBB->getBasicBlock();
11709 MachineRegisterInfo &MRI = MF->getRegInfo();
11710 // The CFG of probing stack looks as
11711 // +-----+
11712 // | MBB |
11713 // +--+--+
11714 // |
11715 // +----v----+
11716 // +--->+ TestMBB +---+
11717 // | +----+----+ |
11718 // | | |
11719 // | +-----v----+ |
11720 // +---+ BlockMBB | |
11721 // +----------+ |
11722 // |
11723 // +---------+ |
11724 // | TailMBB +<--+
11725 // +---------+
11726 // In MBB, calculate previous frame pointer and final stack pointer.
11727 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11728 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11729 // TailMBB is spliced via \p MI.
11730 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11731 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11732 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11733
11734 MachineFunction::iterator MBBIter = ++MBB->getIterator();
11735 MF->insert(MBBIter, TestMBB);
11736 MF->insert(MBBIter, BlockMBB);
11737 MF->insert(MBBIter, TailMBB);
11738
11739 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11740 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11741
11742 Register DstReg = MI.getOperand(0).getReg();
11743 Register NegSizeReg = MI.getOperand(1).getReg();
11744 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11745 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11746 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11747 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11748
11749 // Since value of NegSizeReg might be realigned in prologepilog, insert a
11750 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11751 // NegSize.
11752 unsigned ProbeOpc;
11753 if (!MRI.hasOneNonDBGUse(NegSizeReg))
11754 ProbeOpc =
11755 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11756 else
11757 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11758 // and NegSizeReg will be allocated in the same phyreg to avoid
11759 // redundant copy when NegSizeReg has only one use which is current MI and
11760 // will be replaced by PREPARE_PROBED_ALLOCA then.
11761 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11762 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11763 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11764 .addDef(ActualNegSizeReg)
11765 .addReg(NegSizeReg)
11766 .add(MI.getOperand(2))
11767 .add(MI.getOperand(3));
11768
11769 // Calculate final stack pointer, which equals to SP + ActualNegSize.
11770 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11771 FinalStackPtr)
11772 .addReg(SPReg)
11773 .addReg(ActualNegSizeReg);
11774
11775 // Materialize a scratch register for update.
11776 int64_t NegProbeSize = -(int64_t)ProbeSize;
11777 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!")((isInt<32>(NegProbeSize) && "Unhandled probe size!"
) ? static_cast<void> (0) : __assert_fail ("isInt<32>(NegProbeSize) && \"Unhandled probe size!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11777, __PRETTY_FUNCTION__))
;
11778 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11779 if (!isInt<16>(NegProbeSize)) {
11780 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11781 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11782 .addImm(NegProbeSize >> 16);
11783 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11784 ScratchReg)
11785 .addReg(TempReg)
11786 .addImm(NegProbeSize & 0xFFFF);
11787 } else
11788 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11789 .addImm(NegProbeSize);
11790
11791 {
11792 // Probing leading residual part.
11793 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11794 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11795 .addReg(ActualNegSizeReg)
11796 .addReg(ScratchReg);
11797 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11798 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11799 .addReg(Div)
11800 .addReg(ScratchReg);
11801 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11802 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11803 .addReg(Mul)
11804 .addReg(ActualNegSizeReg);
11805 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11806 .addReg(FramePointer)
11807 .addReg(SPReg)
11808 .addReg(NegMod);
11809 }
11810
11811 {
11812 // Remaining part should be multiple of ProbeSize.
11813 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11814 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11815 .addReg(SPReg)
11816 .addReg(FinalStackPtr);
11817 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11818 .addImm(PPC::PRED_EQ)
11819 .addReg(CmpResult)
11820 .addMBB(TailMBB);
11821 TestMBB->addSuccessor(BlockMBB);
11822 TestMBB->addSuccessor(TailMBB);
11823 }
11824
11825 {
11826 // Touch the block.
11827 // |P...|P...|P...
11828 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11829 .addReg(FramePointer)
11830 .addReg(SPReg)
11831 .addReg(ScratchReg);
11832 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11833 BlockMBB->addSuccessor(TestMBB);
11834 }
11835
11836 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11837 // DYNAREAOFFSET pseudo instruction to get the future result.
11838 Register MaxCallFrameSizeReg =
11839 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11840 BuildMI(TailMBB, DL,
11841 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11842 MaxCallFrameSizeReg)
11843 .add(MI.getOperand(2))
11844 .add(MI.getOperand(3));
11845 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11846 .addReg(SPReg)
11847 .addReg(MaxCallFrameSizeReg);
11848
11849 // Splice instructions after MI to TailMBB.
11850 TailMBB->splice(TailMBB->end(), MBB,
11851 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11852 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11853 MBB->addSuccessor(TestMBB);
11854
11855 // Delete the pseudo instruction.
11856 MI.eraseFromParent();
11857
11858 ++NumDynamicAllocaProbed;
11859 return TailMBB;
11860}
11861
11862MachineBasicBlock *
11863PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11864 MachineBasicBlock *BB) const {
11865 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11866 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11867 if (Subtarget.is64BitELFABI() &&
11868 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11869 !Subtarget.isUsingPCRelativeCalls()) {
11870 // Call lowering should have added an r2 operand to indicate a dependence
11871 // on the TOC base pointer value. It can't however, because there is no
11872 // way to mark the dependence as implicit there, and so the stackmap code
11873 // will confuse it with a regular operand. Instead, add the dependence
11874 // here.
11875 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11876 }
11877
11878 return emitPatchPoint(MI, BB);
11879 }
11880
11881 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11882 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11883 return emitEHSjLjSetJmp(MI, BB);
11884 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11885 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11886 return emitEHSjLjLongJmp(MI, BB);
11887 }
11888
11889 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11890
11891 // To "insert" these instructions we actually have to insert their
11892 // control-flow patterns.
11893 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11894 MachineFunction::iterator It = ++BB->getIterator();
11895
11896 MachineFunction *F = BB->getParent();
11897
11898 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11899 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11900 MI.getOpcode() == PPC::SELECT_I8) {
11901 SmallVector<MachineOperand, 2> Cond;
11902 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11903 MI.getOpcode() == PPC::SELECT_CC_I8)
11904 Cond.push_back(MI.getOperand(4));
11905 else
11906 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11907 Cond.push_back(MI.getOperand(1));
11908
11909 DebugLoc dl = MI.getDebugLoc();
11910 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11911 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11912 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11913 MI.getOpcode() == PPC::SELECT_CC_F8 ||
11914 MI.getOpcode() == PPC::SELECT_CC_F16 ||
11915 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11916 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11917 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11918 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11919 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11920 MI.getOpcode() == PPC::SELECT_CC_SPE ||
11921 MI.getOpcode() == PPC::SELECT_F4 ||
11922 MI.getOpcode() == PPC::SELECT_F8 ||
11923 MI.getOpcode() == PPC::SELECT_F16 ||
11924 MI.getOpcode() == PPC::SELECT_SPE ||
11925 MI.getOpcode() == PPC::SELECT_SPE4 ||
11926 MI.getOpcode() == PPC::SELECT_VRRC ||
11927 MI.getOpcode() == PPC::SELECT_VSFRC ||
11928 MI.getOpcode() == PPC::SELECT_VSSRC ||
11929 MI.getOpcode() == PPC::SELECT_VSRC) {
11930 // The incoming instruction knows the destination vreg to set, the
11931 // condition code register to branch on, the true/false values to
11932 // select between, and a branch opcode to use.
11933
11934 // thisMBB:
11935 // ...
11936 // TrueVal = ...
11937 // cmpTY ccX, r1, r2
11938 // bCC copy1MBB
11939 // fallthrough --> copy0MBB
11940 MachineBasicBlock *thisMBB = BB;
11941 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11942 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11943 DebugLoc dl = MI.getDebugLoc();
11944 F->insert(It, copy0MBB);
11945 F->insert(It, sinkMBB);
11946
11947 // Transfer the remainder of BB and its successor edges to sinkMBB.
11948 sinkMBB->splice(sinkMBB->begin(), BB,
11949 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11950 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11951
11952 // Next, add the true and fallthrough blocks as its successors.
11953 BB->addSuccessor(copy0MBB);
11954 BB->addSuccessor(sinkMBB);
11955
11956 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11957 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11958 MI.getOpcode() == PPC::SELECT_F16 ||
11959 MI.getOpcode() == PPC::SELECT_SPE4 ||
11960 MI.getOpcode() == PPC::SELECT_SPE ||
11961 MI.getOpcode() == PPC::SELECT_VRRC ||
11962 MI.getOpcode() == PPC::SELECT_VSFRC ||
11963 MI.getOpcode() == PPC::SELECT_VSSRC ||
11964 MI.getOpcode() == PPC::SELECT_VSRC) {
11965 BuildMI(BB, dl, TII->get(PPC::BC))
11966 .addReg(MI.getOperand(1).getReg())
11967 .addMBB(sinkMBB);
11968 } else {
11969 unsigned SelectPred = MI.getOperand(4).getImm();
11970 BuildMI(BB, dl, TII->get(PPC::BCC))
11971 .addImm(SelectPred)
11972 .addReg(MI.getOperand(1).getReg())
11973 .addMBB(sinkMBB);
11974 }
11975
11976 // copy0MBB:
11977 // %FalseValue = ...
11978 // # fallthrough to sinkMBB
11979 BB = copy0MBB;
11980
11981 // Update machine-CFG edges
11982 BB->addSuccessor(sinkMBB);
11983
11984 // sinkMBB:
11985 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11986 // ...
11987 BB = sinkMBB;
11988 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11989 .addReg(MI.getOperand(3).getReg())
11990 .addMBB(copy0MBB)
11991 .addReg(MI.getOperand(2).getReg())
11992 .addMBB(thisMBB);
11993 } else if (MI.getOpcode() == PPC::ReadTB) {
11994 // To read the 64-bit time-base register on a 32-bit target, we read the
11995 // two halves. Should the counter have wrapped while it was being read, we
11996 // need to try again.
11997 // ...
11998 // readLoop:
11999 // mfspr Rx,TBU # load from TBU
12000 // mfspr Ry,TB # load from TB
12001 // mfspr Rz,TBU # load from TBU
12002 // cmpw crX,Rx,Rz # check if 'old'='new'
12003 // bne readLoop # branch if they're not equal
12004 // ...
12005
12006 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12007 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12008 DebugLoc dl = MI.getDebugLoc();
12009 F->insert(It, readMBB);
12010 F->insert(It, sinkMBB);
12011
12012 // Transfer the remainder of BB and its successor edges to sinkMBB.
12013 sinkMBB->splice(sinkMBB->begin(), BB,
12014 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12015 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12016
12017 BB->addSuccessor(readMBB);
12018 BB = readMBB;
12019
12020 MachineRegisterInfo &RegInfo = F->getRegInfo();
12021 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12022 Register LoReg = MI.getOperand(0).getReg();
12023 Register HiReg = MI.getOperand(1).getReg();
12024
12025 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12026 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12027 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12028
12029 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12030
12031 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12032 .addReg(HiReg)
12033 .addReg(ReadAgainReg);
12034 BuildMI(BB, dl, TII->get(PPC::BCC))
12035 .addImm(PPC::PRED_NE)
12036 .addReg(CmpReg)
12037 .addMBB(readMBB);
12038
12039 BB->addSuccessor(readMBB);
12040 BB->addSuccessor(sinkMBB);
12041 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12042 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12044 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12046 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12048 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12049
12050 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12051 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12053 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12055 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12057 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12058
12059 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12060 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12062 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12064 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12066 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12067
12068 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12069 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12070 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12071 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12073 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12075 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12076
12077 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12078 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12080 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12082 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12083 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12084 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12085
12086 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12087 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12089 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12091 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12093 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12094
12095 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12096 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12097 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12098 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12099 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12100 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12101 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12102 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12103
12104 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12105 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12106 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12107 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12108 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12109 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12110 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12111 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12112
12113 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12114 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12115 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12116 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12117 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12118 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12119 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12120 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12121
12122 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12123 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12124 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12125 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12126 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12127 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12128 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12129 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12130
12131 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12132 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12133 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12134 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12135 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12136 BB = EmitAtomicBinary(MI, BB, 4, 0);
12137 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12138 BB = EmitAtomicBinary(MI, BB, 8, 0);
12139 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12140 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12141 (Subtarget.hasPartwordAtomics() &&
12142 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12143 (Subtarget.hasPartwordAtomics() &&
12144 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12145 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12146
12147 auto LoadMnemonic = PPC::LDARX;
12148 auto StoreMnemonic = PPC::STDCX;
12149 switch (MI.getOpcode()) {
12150 default:
12151 llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12151)
;
12152 case PPC::ATOMIC_CMP_SWAP_I8:
12153 LoadMnemonic = PPC::LBARX;
12154 StoreMnemonic = PPC::STBCX;
12155 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12155, __PRETTY_FUNCTION__))
;
12156 break;
12157 case PPC::ATOMIC_CMP_SWAP_I16:
12158 LoadMnemonic = PPC::LHARX;
12159 StoreMnemonic = PPC::STHCX;
12160 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12160, __PRETTY_FUNCTION__))
;
12161 break;
12162 case PPC::ATOMIC_CMP_SWAP_I32:
12163 LoadMnemonic = PPC::LWARX;
12164 StoreMnemonic = PPC::STWCX;
12165 break;
12166 case PPC::ATOMIC_CMP_SWAP_I64:
12167 LoadMnemonic = PPC::LDARX;
12168 StoreMnemonic = PPC::STDCX;
12169 break;
12170 }
12171 Register dest = MI.getOperand(0).getReg();
12172 Register ptrA = MI.getOperand(1).getReg();
12173 Register ptrB = MI.getOperand(2).getReg();
12174 Register oldval = MI.getOperand(3).getReg();
12175 Register newval = MI.getOperand(4).getReg();
12176 DebugLoc dl = MI.getDebugLoc();
12177
12178 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12179 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12180 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12181 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12182 F->insert(It, loop1MBB);
12183 F->insert(It, loop2MBB);
12184 F->insert(It, midMBB);
12185 F->insert(It, exitMBB);
12186 exitMBB->splice(exitMBB->begin(), BB,
12187 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12188 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12189
12190 // thisMBB:
12191 // ...
12192 // fallthrough --> loopMBB
12193 BB->addSuccessor(loop1MBB);
12194
12195 // loop1MBB:
12196 // l[bhwd]arx dest, ptr
12197 // cmp[wd] dest, oldval
12198 // bne- midMBB
12199 // loop2MBB:
12200 // st[bhwd]cx. newval, ptr
12201 // bne- loopMBB
12202 // b exitBB
12203 // midMBB:
12204 // st[bhwd]cx. dest, ptr
12205 // exitBB:
12206 BB = loop1MBB;
12207 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12208 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12209 .addReg(oldval)
12210 .addReg(dest);
12211 BuildMI(BB, dl, TII->get(PPC::BCC))
12212 .addImm(PPC::PRED_NE)
12213 .addReg(PPC::CR0)
12214 .addMBB(midMBB);
12215 BB->addSuccessor(loop2MBB);
12216 BB->addSuccessor(midMBB);
12217
12218 BB = loop2MBB;
12219 BuildMI(BB, dl, TII->get(StoreMnemonic))
12220 .addReg(newval)
12221 .addReg(ptrA)
12222 .addReg(ptrB);
12223 BuildMI(BB, dl, TII->get(PPC::BCC))
12224 .addImm(PPC::PRED_NE)
12225 .addReg(PPC::CR0)
12226 .addMBB(loop1MBB);
12227 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12228 BB->addSuccessor(loop1MBB);
12229 BB->addSuccessor(exitMBB);
12230
12231 BB = midMBB;
12232 BuildMI(BB, dl, TII->get(StoreMnemonic))
12233 .addReg(dest)
12234 .addReg(ptrA)
12235 .addReg(ptrB);
12236 BB->addSuccessor(exitMBB);
12237
12238 // exitMBB:
12239 // ...
12240 BB = exitMBB;
12241 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12242 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12243 // We must use 64-bit registers for addresses when targeting 64-bit,
12244 // since we're actually doing arithmetic on them. Other registers
12245 // can be 32-bit.
12246 bool is64bit = Subtarget.isPPC64();
12247 bool isLittleEndian = Subtarget.isLittleEndian();
12248 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12249
12250 Register dest = MI.getOperand(0).getReg();
12251 Register ptrA = MI.getOperand(1).getReg();
12252 Register ptrB = MI.getOperand(2).getReg();
12253 Register oldval = MI.getOperand(3).getReg();
12254 Register newval = MI.getOperand(4).getReg();
12255 DebugLoc dl = MI.getDebugLoc();
12256
12257 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12258 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12259 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12260 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12261 F->insert(It, loop1MBB);
12262 F->insert(It, loop2MBB);
12263 F->insert(It, midMBB);
12264 F->insert(It, exitMBB);
12265 exitMBB->splice(exitMBB->begin(), BB,
12266 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12267 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12268
12269 MachineRegisterInfo &RegInfo = F->getRegInfo();
12270 const TargetRegisterClass *RC =
12271 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12272 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12273
12274 Register PtrReg = RegInfo.createVirtualRegister(RC);
12275 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12276 Register ShiftReg =
12277 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12278 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12279 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12280 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12281 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12282 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12283 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12284 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12285 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12286 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12287 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12288 Register Ptr1Reg;
12289 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12290 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12291 // thisMBB:
12292 // ...
12293 // fallthrough --> loopMBB
12294 BB->addSuccessor(loop1MBB);
12295
12296 // The 4-byte load must be aligned, while a char or short may be
12297 // anywhere in the word. Hence all this nasty bookkeeping code.
12298 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12299 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12300 // xori shift, shift1, 24 [16]
12301 // rlwinm ptr, ptr1, 0, 0, 29
12302 // slw newval2, newval, shift
12303 // slw oldval2, oldval,shift
12304 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12305 // slw mask, mask2, shift
12306 // and newval3, newval2, mask
12307 // and oldval3, oldval2, mask
12308 // loop1MBB:
12309 // lwarx tmpDest, ptr
12310 // and tmp, tmpDest, mask
12311 // cmpw tmp, oldval3
12312 // bne- midMBB
12313 // loop2MBB:
12314 // andc tmp2, tmpDest, mask
12315 // or tmp4, tmp2, newval3
12316 // stwcx. tmp4, ptr
12317 // bne- loop1MBB
12318 // b exitBB
12319 // midMBB:
12320 // stwcx. tmpDest, ptr
12321 // exitBB:
12322 // srw dest, tmpDest, shift
12323 if (ptrA != ZeroReg) {
12324 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12325 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12326 .addReg(ptrA)
12327 .addReg(ptrB);
12328 } else {
12329 Ptr1Reg = ptrB;
12330 }
12331
12332 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12333 // mode.
12334 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12335 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12336 .addImm(3)
12337 .addImm(27)
12338 .addImm(is8bit ? 28 : 27);
12339 if (!isLittleEndian)
12340 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12341 .addReg(Shift1Reg)
12342 .addImm(is8bit ? 24 : 16);
12343 if (is64bit)
12344 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12345 .addReg(Ptr1Reg)
12346 .addImm(0)
12347 .addImm(61);
12348 else
12349 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12350 .addReg(Ptr1Reg)
12351 .addImm(0)
12352 .addImm(0)
12353 .addImm(29);
12354 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12355 .addReg(newval)
12356 .addReg(ShiftReg);
12357 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12358 .addReg(oldval)
12359 .addReg(ShiftReg);
12360 if (is8bit)
12361 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12362 else {
12363 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12364 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12365 .addReg(Mask3Reg)
12366 .addImm(65535);
12367 }
12368 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12369 .addReg(Mask2Reg)
12370 .addReg(ShiftReg);
12371 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12372 .addReg(NewVal2Reg)
12373 .addReg(MaskReg);
12374 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12375 .addReg(OldVal2Reg)
12376 .addReg(MaskReg);
12377
12378 BB = loop1MBB;
12379 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12380 .addReg(ZeroReg)
12381 .addReg(PtrReg);
12382 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12383 .addReg(TmpDestReg)
12384 .addReg(MaskReg);
12385 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12386 .addReg(TmpReg)
12387 .addReg(OldVal3Reg);
12388 BuildMI(BB, dl, TII->get(PPC::BCC))
12389 .addImm(PPC::PRED_NE)
12390 .addReg(PPC::CR0)
12391 .addMBB(midMBB);
12392 BB->addSuccessor(loop2MBB);
12393 BB->addSuccessor(midMBB);
12394
12395 BB = loop2MBB;
12396 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12397 .addReg(TmpDestReg)
12398 .addReg(MaskReg);
12399 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12400 .addReg(Tmp2Reg)
12401 .addReg(NewVal3Reg);
12402 BuildMI(BB, dl, TII->get(PPC::STWCX))
12403 .addReg(Tmp4Reg)
12404 .addReg(ZeroReg)
12405 .addReg(PtrReg);
12406 BuildMI(BB, dl, TII->get(PPC::BCC))
12407 .addImm(PPC::PRED_NE)
12408 .addReg(PPC::CR0)
12409 .addMBB(loop1MBB);
12410 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12411 BB->addSuccessor(loop1MBB);
12412 BB->addSuccessor(exitMBB);
12413
12414 BB = midMBB;
12415 BuildMI(BB, dl, TII->get(PPC::STWCX))
12416 .addReg(TmpDestReg)
12417 .addReg(ZeroReg)
12418 .addReg(PtrReg);
12419 BB->addSuccessor(exitMBB);
12420
12421 // exitMBB:
12422 // ...
12423 BB = exitMBB;
12424 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12425 .addReg(TmpReg)
12426 .addReg(ShiftReg);
12427 } else if (MI.getOpcode() == PPC::FADDrtz) {
12428 // This pseudo performs an FADD with rounding mode temporarily forced
12429 // to round-to-zero. We emit this via custom inserter since the FPSCR
12430 // is not modeled at the SelectionDAG level.
12431 Register Dest = MI.getOperand(0).getReg();
12432 Register Src1 = MI.getOperand(1).getReg();
12433 Register Src2 = MI.getOperand(2).getReg();
12434 DebugLoc dl = MI.getDebugLoc();
12435
12436 MachineRegisterInfo &RegInfo = F->getRegInfo();
12437 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12438
12439 // Save FPSCR value.
12440 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12441
12442 // Set rounding mode to round-to-zero.
12443 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12444 .addImm(31)
12445 .addReg(PPC::RM, RegState::ImplicitDefine);
12446
12447 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12448 .addImm(30)
12449 .addReg(PPC::RM, RegState::ImplicitDefine);
12450
12451 // Perform addition.
12452 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12453 .addReg(Src1)
12454 .addReg(Src2);
12455 if (MI.getFlag(MachineInstr::NoFPExcept))
12456 MIB.setMIFlag(MachineInstr::NoFPExcept);
12457
12458 // Restore FPSCR value.
12459 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12460 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12461 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12462 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12463 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12464 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12465 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12466 ? PPC::ANDI8_rec
12467 : PPC::ANDI_rec;
12468 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12469 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12470
12471 MachineRegisterInfo &RegInfo = F->getRegInfo();
12472 Register Dest = RegInfo.createVirtualRegister(
12473 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12474
12475 DebugLoc Dl = MI.getDebugLoc();
12476 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12477 .addReg(MI.getOperand(1).getReg())
12478 .addImm(1);
12479 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12480 MI.getOperand(0).getReg())
12481 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12482 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12483 DebugLoc Dl = MI.getDebugLoc();
12484 MachineRegisterInfo &RegInfo = F->getRegInfo();
12485 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12486 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12487 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12488 MI.getOperand(0).getReg())
12489 .addReg(CRReg);
12490 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12491 DebugLoc Dl = MI.getDebugLoc();
12492 unsigned Imm = MI.getOperand(1).getImm();
12493 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12494 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12495 MI.getOperand(0).getReg())
12496 .addReg(PPC::CR0EQ);
12497 } else if (MI.getOpcode() == PPC::SETRNDi) {
12498 DebugLoc dl = MI.getDebugLoc();
12499 Register OldFPSCRReg = MI.getOperand(0).getReg();
12500
12501 // Save FPSCR value.
12502 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12503
12504 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12505 // the following settings:
12506 // 00 Round to nearest
12507 // 01 Round to 0
12508 // 10 Round to +inf
12509 // 11 Round to -inf
12510
12511 // When the operand is immediate, using the two least significant bits of
12512 // the immediate to set the bits 62:63 of FPSCR.
12513 unsigned Mode = MI.getOperand(1).getImm();
12514 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12515 .addImm(31)
12516 .addReg(PPC::RM, RegState::ImplicitDefine);
12517
12518 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12519 .addImm(30)
12520 .addReg(PPC::RM, RegState::ImplicitDefine);
12521 } else if (MI.getOpcode() == PPC::SETRND) {
12522 DebugLoc dl = MI.getDebugLoc();
12523
12524 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12525 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12526 // If the target doesn't have DirectMove, we should use stack to do the
12527 // conversion, because the target doesn't have the instructions like mtvsrd
12528 // or mfvsrd to do this conversion directly.
12529 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12530 if (Subtarget.hasDirectMove()) {
12531 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12532 .addReg(SrcReg);
12533 } else {
12534 // Use stack to do the register copy.
12535 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12536 MachineRegisterInfo &RegInfo = F->getRegInfo();
12537 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12538 if (RC == &PPC::F8RCRegClass) {
12539 // Copy register from F8RCRegClass to G8RCRegclass.
12540 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12541, __PRETTY_FUNCTION__))
12541 "Unsupported RegClass.")(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12541, __PRETTY_FUNCTION__))
;
12542
12543 StoreOp = PPC::STFD;
12544 LoadOp = PPC::LD;
12545 } else {
12546 // Copy register from G8RCRegClass to F8RCRegclass.
12547 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))
12548 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))
12549 "Unsupported RegClass.")(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12549, __PRETTY_FUNCTION__))
;
12550 }
12551
12552 MachineFrameInfo &MFI = F->getFrameInfo();
12553 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12554
12555 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12556 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12557 MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12558 MFI.getObjectAlign(FrameIdx));
12559
12560 // Store the SrcReg into the stack.
12561 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12562 .addReg(SrcReg)
12563 .addImm(0)
12564 .addFrameIndex(FrameIdx)
12565 .addMemOperand(MMOStore);
12566
12567 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12568 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12569 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12570 MFI.getObjectAlign(FrameIdx));
12571
12572 // Load from the stack where SrcReg is stored, and save to DestReg,
12573 // so we have done the RegClass conversion from RegClass::SrcReg to
12574 // RegClass::DestReg.
12575 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12576 .addImm(0)
12577 .addFrameIndex(FrameIdx)
12578 .addMemOperand(MMOLoad);
12579 }
12580 };
12581
12582 Register OldFPSCRReg = MI.getOperand(0).getReg();
12583
12584 // Save FPSCR value.
12585 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12586
12587 // When the operand is gprc register, use two least significant bits of the
12588 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12589 //
12590 // copy OldFPSCRTmpReg, OldFPSCRReg
12591 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12592 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12593 // copy NewFPSCRReg, NewFPSCRTmpReg
12594 // mtfsf 255, NewFPSCRReg
12595 MachineOperand SrcOp = MI.getOperand(1);
12596 MachineRegisterInfo &RegInfo = F->getRegInfo();
12597 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12598
12599 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12600
12601 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12602 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12603
12604 // The first operand of INSERT_SUBREG should be a register which has
12605 // subregisters, we only care about its RegClass, so we should use an
12606 // IMPLICIT_DEF register.
12607 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12608 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12609 .addReg(ImDefReg)
12610 .add(SrcOp)
12611 .addImm(1);
12612
12613 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12614 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12615 .addReg(OldFPSCRTmpReg)
12616 .addReg(ExtSrcReg)
12617 .addImm(0)
12618 .addImm(62);
12619
12620 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12621 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12622
12623 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12624 // bits of FPSCR.
12625 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12626 .addImm(255)
12627 .addReg(NewFPSCRReg)
12628 .addImm(0)
12629 .addImm(0);
12630 } else if (MI.getOpcode() == PPC::SETFLM) {
12631 DebugLoc Dl = MI.getDebugLoc();
12632
12633 // Result of setflm is previous FPSCR content, so we need to save it first.
12634 Register OldFPSCRReg = MI.getOperand(0).getReg();
12635 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12636
12637 // Put bits in 32:63 to FPSCR.
12638 Register NewFPSCRReg = MI.getOperand(1).getReg();
12639 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12640 .addImm(255)
12641 .addReg(NewFPSCRReg)
12642 .addImm(0)
12643 .addImm(0);
12644 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12645 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12646 return emitProbedAlloca(MI, BB);
12647 } else {
12648 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12648)
;
12649 }
12650
12651 MI.eraseFromParent(); // The pseudo instruction is gone now.
12652 return BB;
12653}
12654
12655//===----------------------------------------------------------------------===//
12656// Target Optimization Hooks
12657//===----------------------------------------------------------------------===//
12658
12659static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12660 // For the estimates, convergence is quadratic, so we essentially double the
12661 // number of digits correct after every iteration. For both FRE and FRSQRTE,
12662 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12663 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12664 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12665 if (VT.getScalarType() == MVT::f64)
12666 RefinementSteps++;
12667 return RefinementSteps;
12668}
12669
12670SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12671 int Enabled, int &RefinementSteps,
12672 bool &UseOneConstNR,
12673 bool Reciprocal) const {
12674 EVT VT = Operand.getValueType();
12675 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12676 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12677 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12678 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12679 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12680 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12681
12682 // The Newton-Raphson computation with a single constant does not provide
12683 // enough accuracy on some CPUs.
12684 UseOneConstNR = !Subtarget.needsTwoConstNR();
12685 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12686 }
12687 return SDValue();
12688}
12689
12690SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12691 int Enabled,
12692 int &RefinementSteps) const {
12693 EVT VT = Operand.getValueType();
12694 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12695 (VT == MVT::f64 && Subtarget.hasFRE()) ||
12696 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12697 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12698 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12699 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12700 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12701 }
12702 return SDValue();
12703}
12704
12705unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12706 // Note: This functionality is used only when unsafe-fp-math is enabled, and
12707 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12708 // enabled for division), this functionality is redundant with the default
12709 // combiner logic (once the division -> reciprocal/multiply transformation
12710 // has taken place). As a result, this matters more for older cores than for
12711 // newer ones.
12712
12713 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12714 // reciprocal if there are two or more FDIVs (for embedded cores with only
12715 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12716 switch (Subtarget.getCPUDirective()) {
12717 default:
12718 return 3;
12719 case PPC::DIR_440:
12720 case PPC::DIR_A2:
12721 case PPC::DIR_E500:
12722 case PPC::DIR_E500mc:
12723 case PPC::DIR_E5500:
12724 return 2;
12725 }
12726}
12727
12728// isConsecutiveLSLoc needs to work even if all adds have not yet been
12729// collapsed, and so we need to look through chains of them.
12730static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12731 int64_t& Offset, SelectionDAG &DAG) {
12732 if (DAG.isBaseWithConstantOffset(Loc)) {
12733 Base = Loc.getOperand(0);
12734 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12735
12736 // The base might itself be a base plus an offset, and if so, accumulate
12737 // that as well.
12738 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12739 }
12740}
12741
12742static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12743 unsigned Bytes, int Dist,
12744 SelectionDAG &DAG) {
12745 if (VT.getSizeInBits() / 8 != Bytes)
12746 return false;
12747
12748 SDValue BaseLoc = Base->getBasePtr();
12749 if (Loc.getOpcode() == ISD::FrameIndex) {
12750 if (BaseLoc.getOpcode() != ISD::FrameIndex)
12751 return false;
12752 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12753 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12754 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12755 int FS = MFI.getObjectSize(FI);
12756 int BFS = MFI.getObjectSize(BFI);
12757 if (FS != BFS || FS != (int)Bytes) return false;
12758 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12759 }
12760
12761 SDValue Base1 = Loc, Base2 = BaseLoc;
12762 int64_t Offset1 = 0, Offset2 = 0;
12763 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12764 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12765 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12766 return true;
12767
12768 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12769 const GlobalValue *GV1 = nullptr;
12770 const GlobalValue *GV2 = nullptr;
12771 Offset1 = 0;
12772 Offset2 = 0;
12773 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12774 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12775 if (isGA1 && isGA2 && GV1 == GV2)
12776 return Offset1 == (Offset2 + Dist*Bytes);
12777 return false;
12778}
12779
12780// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12781// not enforce equality of the chain operands.
12782static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12783 unsigned Bytes, int Dist,
12784 SelectionDAG &DAG) {
12785 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12786 EVT VT = LS->getMemoryVT();
12787 SDValue Loc = LS->getBasePtr();
12788 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12789 }
12790
12791 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12792 EVT VT;
12793 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12794 default: return false;
12795 case Intrinsic::ppc_altivec_lvx:
12796 case Intrinsic::ppc_altivec_lvxl:
12797 case Intrinsic::ppc_vsx_lxvw4x:
12798 case Intrinsic::ppc_vsx_lxvw4x_be:
12799 VT = MVT::v4i32;
12800 break;
12801 case Intrinsic::ppc_vsx_lxvd2x:
12802 case Intrinsic::ppc_vsx_lxvd2x_be:
12803 VT = MVT::v2f64;
12804 break;
12805 case Intrinsic::ppc_altivec_lvebx:
12806 VT = MVT::i8;
12807 break;
12808 case Intrinsic::ppc_altivec_lvehx:
12809 VT = MVT::i16;
12810 break;
12811 case Intrinsic::ppc_altivec_lvewx:
12812 VT = MVT::i32;
12813 break;
12814 }
12815
12816 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12817 }
12818
12819 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12820 EVT VT;
12821 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12822 default: return false;
12823 case Intrinsic::ppc_altivec_stvx:
12824 case Intrinsic::ppc_altivec_stvxl:
12825 case Intrinsic::ppc_vsx_stxvw4x:
12826 VT = MVT::v4i32;
12827 break;
12828 case Intrinsic::ppc_vsx_stxvd2x:
12829 VT = MVT::v2f64;
12830 break;
12831 case Intrinsic::ppc_vsx_stxvw4x_be:
12832 VT = MVT::v4i32;
12833 break;
12834 case Intrinsic::ppc_vsx_stxvd2x_be:
12835 VT = MVT::v2f64;
12836 break;
12837 case Intrinsic::ppc_altivec_stvebx:
12838 VT = MVT::i8;
12839 break;
12840 case Intrinsic::ppc_altivec_stvehx:
12841 VT = MVT::i16;
12842 break;
12843 case Intrinsic::ppc_altivec_stvewx:
12844 VT = MVT::i32;
12845 break;
12846 }
12847
12848 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12849 }
12850
12851 return false;
12852}
12853
12854// Return true is there is a nearyby consecutive load to the one provided
12855// (regardless of alignment). We search up and down the chain, looking though
12856// token factors and other loads (but nothing else). As a result, a true result
12857// indicates that it is safe to create a new consecutive load adjacent to the
12858// load provided.
12859static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12860 SDValue Chain = LD->getChain();
12861 EVT VT = LD->getMemoryVT();
12862
12863 SmallSet<SDNode *, 16> LoadRoots;
12864 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12865 SmallSet<SDNode *, 16> Visited;
12866
12867 // First, search up the chain, branching to follow all token-factor operands.
12868 // If we find a consecutive load, then we're done, otherwise, record all
12869 // nodes just above the top-level loads and token factors.
12870 while (!Queue.empty()) {
12871 SDNode *ChainNext = Queue.pop_back_val();
12872 if (!Visited.insert(ChainNext).second)
12873 continue;
12874
12875 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12876 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12877 return true;
12878
12879 if (!Visited.count(ChainLD->getChain().getNode()))
12880 Queue.push_back(ChainLD->getChain().getNode());
12881 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12882 for (const SDUse &O : ChainNext->ops())
12883 if (!Visited.count(O.getNode()))
12884 Queue.push_back(O.getNode());
12885 } else
12886 LoadRoots.insert(ChainNext);
12887 }
12888
12889 // Second, search down the chain, starting from the top-level nodes recorded
12890 // in the first phase. These top-level nodes are the nodes just above all
12891 // loads and token factors. Starting with their uses, recursively look though
12892 // all loads (just the chain uses) and token factors to find a consecutive
12893 // load.
12894 Visited.clear();
12895 Queue.clear();
12896
12897 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12898 IE = LoadRoots.end(); I != IE; ++I) {
12899 Queue.push_back(*I);
12900
12901 while (!Queue.empty()) {
12902 SDNode *LoadRoot = Queue.pop_back_val();
12903 if (!Visited.insert(LoadRoot).second)
12904 continue;
12905
12906 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12907 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12908 return true;
12909
12910 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12911 UE = LoadRoot->use_end(); UI != UE; ++UI)
12912 if (((isa<MemSDNode>(*UI) &&
12913 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12914 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12915 Queue.push_back(*UI);
12916 }
12917 }
12918
12919 return false;
12920}
12921
12922/// This function is called when we have proved that a SETCC node can be replaced
12923/// by subtraction (and other supporting instructions) so that the result of
12924/// comparison is kept in a GPR instead of CR. This function is purely for
12925/// codegen purposes and has some flags to guide the codegen process.
12926static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12927 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12928 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12928, __PRETTY_FUNCTION__))
;
12929
12930 // Zero extend the operands to the largest legal integer. Originally, they
12931 // must be of a strictly smaller size.
12932 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12933 DAG.getConstant(Size, DL, MVT::i32));
12934 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12935 DAG.getConstant(Size, DL, MVT::i32));
12936
12937 // Swap if needed. Depends on the condition code.
12938 if (Swap)
12939 std::swap(Op0, Op1);
12940
12941 // Subtract extended integers.
12942 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12943
12944 // Move the sign bit to the least significant position and zero out the rest.
12945 // Now the least significant bit carries the result of original comparison.
12946 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12947 DAG.getConstant(Size - 1, DL, MVT::i32));
12948 auto Final = Shifted;
12949
12950 // Complement the result if needed. Based on the condition code.
12951 if (Complement)
12952 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12953 DAG.getConstant(1, DL, MVT::i64));
12954
12955 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12956}
12957
12958SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12959 DAGCombinerInfo &DCI) const {
12960 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12960, __PRETTY_FUNCTION__))
;
12961
12962 SelectionDAG &DAG = DCI.DAG;
12963 SDLoc DL(N);
12964
12965 // Size of integers being compared has a critical role in the following
12966 // analysis, so we prefer to do this when all types are legal.
12967 if (!DCI.isAfterLegalizeDAG())
12968 return SDValue();
12969
12970 // If all users of SETCC extend its value to a legal integer type
12971 // then we replace SETCC with a subtraction
12972 for (SDNode::use_iterator UI = N->use_begin(),
12973 UE = N->use_end(); UI != UE; ++UI) {
12974 if (UI->getOpcode() != ISD::ZERO_EXTEND)
12975 return SDValue();
12976 }
12977
12978 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12979 auto OpSize = N->getOperand(0).getValueSizeInBits();
12980
12981 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12982
12983 if (OpSize < Size) {
12984 switch (CC) {
12985 default: break;
12986 case ISD::SETULT:
12987 return generateEquivalentSub(N, Size, false, false, DL, DAG);
12988 case ISD::SETULE:
12989 return generateEquivalentSub(N, Size, true, true, DL, DAG);
12990 case ISD::SETUGT:
12991 return generateEquivalentSub(N, Size, false, true, DL, DAG);
12992 case ISD::SETUGE:
12993 return generateEquivalentSub(N, Size, true, false, DL, DAG);
12994 }
12995 }
12996
12997 return SDValue();
12998}
12999
13000SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13001 DAGCombinerInfo &DCI) const {
13002 SelectionDAG &DAG = DCI.DAG;
13003 SDLoc dl(N);
13004
13005 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")((Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13005, __PRETTY_FUNCTION__))
;
13006 // If we're tracking CR bits, we need to be careful that we don't have:
13007 // trunc(binary-ops(zext(x), zext(y)))
13008 // or
13009 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13010 // such that we're unnecessarily moving things into GPRs when it would be
13011 // better to keep them in CR bits.
13012
13013 // Note that trunc here can be an actual i1 trunc, or can be the effective
13014 // truncation that comes from a setcc or select_cc.
13015 if (N->getOpcode() == ISD::TRUNCATE &&
13016 N->getValueType(0) != MVT::i1)
13017 return SDValue();
13018
13019 if (N->getOperand(0).getValueType() != MVT::i32 &&
13020 N->getOperand(0).getValueType() != MVT::i64)
13021 return SDValue();
13022
13023 if (N->getOpcode() == ISD::SETCC ||
13024 N->getOpcode() == ISD::SELECT_CC) {
13025 // If we're looking at a comparison, then we need to make sure that the
13026 // high bits (all except for the first) don't matter the result.
13027 ISD::CondCode CC =
13028 cast<CondCodeSDNode>(N->getOperand(
13029 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13030 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13031
13032 if (ISD::isSignedIntSetCC(CC)) {
13033 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13034 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13035 return SDValue();
13036 } else if (ISD::isUnsignedIntSetCC(CC)) {
13037 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13038 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13039 !DAG.MaskedValueIsZero(N->getOperand(1),
13040 APInt::getHighBitsSet(OpBits, OpBits-1)))
13041 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13042 : SDValue());
13043 } else {
13044 // This is neither a signed nor an unsigned comparison, just make sure
13045 // that the high bits are equal.
13046 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13047 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13048
13049 // We don't really care about what is known about the first bit (if
13050 // anything), so clear it in all masks prior to comparing them.
13051 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13052 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13053
13054 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13055 return SDValue();
13056 }
13057 }
13058
13059 // We now know that the higher-order bits are irrelevant, we just need to
13060 // make sure that all of the intermediate operations are bit operations, and
13061 // all inputs are extensions.
13062 if (N->getOperand(0).getOpcode() != ISD::AND &&
13063 N->getOperand(0).getOpcode() != ISD::OR &&
13064 N->getOperand(0).getOpcode() != ISD::XOR &&
13065 N->getOperand(0).getOpcode() != ISD::SELECT &&
13066 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13067 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13068 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13069 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13070 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13071 return SDValue();
13072
13073 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13074 N->getOperand(1).getOpcode() != ISD::AND &&
13075 N->getOperand(1).getOpcode() != ISD::OR &&
13076 N->getOperand(1).getOpcode() != ISD::XOR &&
13077 N->getOperand(1).getOpcode() != ISD::SELECT &&
13078 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13079 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13080 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13081 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13082 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13083 return SDValue();
13084
13085 SmallVector<SDValue, 4> Inputs;
13086 SmallVector<SDValue, 8> BinOps, PromOps;
13087 SmallPtrSet<SDNode *, 16> Visited;
13088
13089 for (unsigned i = 0; i < 2; ++i) {
13090 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13091 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13092 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13093 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13094 isa<ConstantSDNode>(N->getOperand(i)))
13095 Inputs.push_back(N->getOperand(i));
13096 else
13097 BinOps.push_back(N->getOperand(i));
13098
13099 if (N->getOpcode() == ISD::TRUNCATE)
13100 break;
13101 }
13102
13103 // Visit all inputs, collect all binary operations (and, or, xor and
13104 // select) that are all fed by extensions.
13105 while (!BinOps.empty()) {
13106 SDValue BinOp = BinOps.back();
13107 BinOps.pop_back();
13108
13109 if (!Visited.insert(BinOp.getNode()).second)
13110 continue;
13111
13112 PromOps.push_back(BinOp);
13113
13114 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13115 // The condition of the select is not promoted.
13116 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13117 continue;
13118 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13119 continue;
13120
13121 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13122 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13123 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13124 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13125 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13126 Inputs.push_back(BinOp.getOperand(i));
13127 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13128 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13129 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13130 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13131 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13132 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13133 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13134 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13135 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13136 BinOps.push_back(BinOp.getOperand(i));
13137 } else {
13138 // We have an input that is not an extension or another binary
13139 // operation; we'll abort this transformation.
13140 return SDValue();
13141 }
13142 }
13143 }
13144
13145 // Make sure that this is a self-contained cluster of operations (which
13146 // is not quite the same thing as saying that everything has only one
13147 // use).
13148 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13149 if (isa<ConstantSDNode>(Inputs[i]))
13150 continue;
13151
13152 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13153 UE = Inputs[i].getNode()->use_end();
13154 UI != UE; ++UI) {
13155 SDNode *User = *UI;
13156 if (User != N && !Visited.count(User))
13157 return SDValue();
13158
13159 // Make sure that we're not going to promote the non-output-value
13160 // operand(s) or SELECT or SELECT_CC.
13161 // FIXME: Although we could sometimes handle this, and it does occur in
13162 // practice that one of the condition inputs to the select is also one of
13163 // the outputs, we currently can't deal with this.
13164 if (User->getOpcode() == ISD::SELECT) {
13165 if (User->getOperand(0) == Inputs[i])
13166 return SDValue();
13167 } else if (User->getOpcode() == ISD::SELECT_CC) {
13168 if (User->getOperand(0) == Inputs[i] ||
13169 User->getOperand(1) == Inputs[i])
13170 return SDValue();
13171 }
13172 }
13173 }
13174
13175 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13176 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13177 UE = PromOps[i].getNode()->use_end();
13178 UI != UE; ++UI) {
13179 SDNode *User = *UI;
13180 if (User != N && !Visited.count(User))
13181 return SDValue();
13182
13183 // Make sure that we're not going to promote the non-output-value
13184 // operand(s) or SELECT or SELECT_CC.
13185 // FIXME: Although we could sometimes handle this, and it does occur in
13186 // practice that one of the condition inputs to the select is also one of
13187 // the outputs, we currently can't deal with this.
13188 if (User->getOpcode() == ISD::SELECT) {
13189 if (User->getOperand(0) == PromOps[i])
13190 return SDValue();
13191 } else if (User->getOpcode() == ISD::SELECT_CC) {
13192 if (User->getOperand(0) == PromOps[i] ||
13193 User->getOperand(1) == PromOps[i])
13194 return SDValue();
13195 }
13196 }
13197 }
13198
13199 // Replace all inputs with the extension operand.
13200 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13201 // Constants may have users outside the cluster of to-be-promoted nodes,
13202 // and so we need to replace those as we do the promotions.
13203 if (isa<ConstantSDNode>(Inputs[i]))
13204 continue;
13205 else
13206 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13207 }
13208
13209 std::list<HandleSDNode> PromOpHandles;
13210 for (auto &PromOp : PromOps)
13211 PromOpHandles.emplace_back(PromOp);
13212
13213 // Replace all operations (these are all the same, but have a different
13214 // (i1) return type). DAG.getNode will validate that the types of
13215 // a binary operator match, so go through the list in reverse so that
13216 // we've likely promoted both operands first. Any intermediate truncations or
13217 // extensions disappear.
13218 while (!PromOpHandles.empty()) {
13219 SDValue PromOp = PromOpHandles.back().getValue();
13220 PromOpHandles.pop_back();
13221
13222 if (PromOp.getOpcode() == ISD::TRUNCATE ||
13223 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13224 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13225 PromOp.getOpcode() == ISD::ANY_EXTEND) {
13226 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13227 PromOp.getOperand(0).getValueType() != MVT::i1) {
13228 // The operand is not yet ready (see comment below).
13229 PromOpHandles.emplace_front(PromOp);
13230 continue;
13231 }
13232
13233 SDValue RepValue = PromOp.getOperand(0);
13234 if (isa<ConstantSDNode>(RepValue))
13235 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13236
13237 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13238 continue;
13239 }
13240
13241 unsigned C;
13242 switch (PromOp.getOpcode()) {
13243 default: C = 0; break;
13244 case ISD::SELECT: C = 1; break;
13245 case ISD::SELECT_CC: C = 2; break;
13246 }
13247
13248 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13249 PromOp.getOperand(C).getValueType() != MVT::i1) ||
13250 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13251 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13252 // The to-be-promoted operands of this node have not yet been
13253 // promoted (this should be rare because we're going through the
13254 // list backward, but if one of the operands has several users in
13255 // this cluster of to-be-promoted nodes, it is possible).
13256 PromOpHandles.emplace_front(PromOp);
13257 continue;
13258 }
13259
13260 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13261 PromOp.getNode()->op_end());
13262
13263 // If there are any constant inputs, make sure they're replaced now.
13264 for (unsigned i = 0; i < 2; ++i)
13265 if (isa<ConstantSDNode>(Ops[C+i]))
13266 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13267
13268 DAG.ReplaceAllUsesOfValueWith(PromOp,
13269 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13270 }
13271
13272 // Now we're left with the initial truncation itself.
13273 if (N->getOpcode() == ISD::TRUNCATE)
13274 return N->getOperand(0);
13275
13276 // Otherwise, this is a comparison. The operands to be compared have just
13277 // changed type (to i1), but everything else is the same.
13278 return SDValue(N, 0);
13279}
13280
13281SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13282 DAGCombinerInfo &DCI) const {
13283 SelectionDAG &DAG = DCI.DAG;
13284 SDLoc dl(N);
13285
13286 // If we're tracking CR bits, we need to be careful that we don't have:
13287 // zext(binary-ops(trunc(x), trunc(y)))
13288 // or
13289 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13290 // such that we're unnecessarily moving things into CR bits that can more
13291 // efficiently stay in GPRs. Note that if we're not certain that the high
13292 // bits are set as required by the final extension, we still may need to do
13293 // some masking to get the proper behavior.
13294
13295 // This same functionality is important on PPC64 when dealing with
13296 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13297 // the return values of functions. Because it is so similar, it is handled
13298 // here as well.
13299
13300 if (N->getValueType(0) != MVT::i32 &&
13301 N->getValueType(0) != MVT::i64)
13302 return SDValue();
13303
13304 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13305 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13306 return SDValue();
13307
13308 if (N->getOperand(0).getOpcode() != ISD::AND &&
13309 N->getOperand(0).getOpcode() != ISD::OR &&
13310 N->getOperand(0).getOpcode() != ISD::XOR &&
13311 N->getOperand(0).getOpcode() != ISD::SELECT &&
13312 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13313 return SDValue();
13314
13315 SmallVector<SDValue, 4> Inputs;
13316 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13317 SmallPtrSet<SDNode *, 16> Visited;
13318
13319 // Visit all inputs, collect all binary operations (and, or, xor and
13320 // select) that are all fed by truncations.
13321 while (!BinOps.empty()) {
13322 SDValue BinOp = BinOps.back();
13323 BinOps.pop_back();
13324
13325 if (!Visited.insert(BinOp.getNode()).second)
13326 continue;
13327
13328 PromOps.push_back(BinOp);
13329
13330 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13331 // The condition of the select is not promoted.
13332 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13333 continue;
13334 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13335 continue;
13336
13337 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13338 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13339 Inputs.push_back(BinOp.getOperand(i));
13340 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13341 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13342 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13343 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13344 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13345 BinOps.push_back(BinOp.getOperand(i));
13346 } else {
13347 // We have an input that is not a truncation or another binary
13348 // operation; we'll abort this transformation.
13349 return SDValue();
13350 }
13351 }
13352 }
13353
13354 // The operands of a select that must be truncated when the select is
13355 // promoted because the operand is actually part of the to-be-promoted set.
13356 DenseMap<SDNode *, EVT> SelectTruncOp[2];
13357
13358 // Make sure that this is a self-contained cluster of operations (which
13359 // is not quite the same thing as saying that everything has only one
13360 // use).
13361 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13362 if (isa<ConstantSDNode>(Inputs[i]))
13363 continue;
13364
13365 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13366 UE = Inputs[i].getNode()->use_end();
13367 UI != UE; ++UI) {
13368 SDNode *User = *UI;
13369 if (User != N && !Visited.count(User))
13370 return SDValue();
13371
13372 // If we're going to promote the non-output-value operand(s) or SELECT or
13373 // SELECT_CC, record them for truncation.
13374 if (User->getOpcode() == ISD::SELECT) {
13375 if (User->getOperand(0) == Inputs[i])
13376 SelectTruncOp[0].insert(std::make_pair(User,
13377 User->getOperand(0).getValueType()));
13378 } else if (User->getOpcode() == ISD::SELECT_CC) {
13379 if (User->getOperand(0) == Inputs[i])
13380 SelectTruncOp[0].insert(std::make_pair(User,
13381 User->getOperand(0).getValueType()));
13382 if (User->getOperand(1) == Inputs[i])
13383 SelectTruncOp[1].insert(std::make_pair(User,
13384 User->getOperand(1).getValueType()));
13385 }
13386 }
13387 }
13388
13389 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13390 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13391 UE = PromOps[i].getNode()->use_end();
13392 UI != UE; ++UI) {
13393 SDNode *User = *UI;
13394 if (User != N && !Visited.count(User))
13395 return SDValue();
13396
13397 // If we're going to promote the non-output-value operand(s) or SELECT or
13398 // SELECT_CC, record them for truncation.
13399 if (User->getOpcode() == ISD::SELECT) {
13400 if (User->getOperand(0) == PromOps[i])
13401 SelectTruncOp[0].insert(std::make_pair(User,
13402 User->getOperand(0).getValueType()));
13403 } else if (User->getOpcode() == ISD::SELECT_CC) {
13404 if (User->getOperand(0) == PromOps[i])
13405 SelectTruncOp[0].insert(std::make_pair(User,
13406 User->getOperand(0).getValueType()));
13407 if (User->getOperand(1) == PromOps[i])
13408 SelectTruncOp[1].insert(std::make_pair(User,
13409 User->getOperand(1).getValueType()));
13410 }
13411 }
13412 }
13413
13414 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13415 bool ReallyNeedsExt = false;
13416 if (N->getOpcode() != ISD::ANY_EXTEND) {
13417 // If all of the inputs are not already sign/zero extended, then
13418 // we'll still need to do that at the end.
13419 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13420 if (isa<ConstantSDNode>(Inputs[i]))
13421 continue;
13422
13423 unsigned OpBits =
13424 Inputs[i].getOperand(0).getValueSizeInBits();
13425 assert(PromBits < OpBits && "Truncation not to a smaller bit count?")((PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? static_cast<void> (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13425, __PRETTY_FUNCTION__))
;
13426
13427 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13428 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13429 APInt::getHighBitsSet(OpBits,
13430 OpBits-PromBits))) ||
13431 (N->getOpcode() == ISD::SIGN_EXTEND &&
13432 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13433 (OpBits-(PromBits-1)))) {
13434 ReallyNeedsExt = true;
13435 break;
13436 }
13437 }
13438 }
13439
13440 // Replace all inputs, either with the truncation operand, or a
13441 // truncation or extension to the final output type.
13442 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13443 // Constant inputs need to be replaced with the to-be-promoted nodes that
13444 // use them because they might have users outside of the cluster of
13445 // promoted nodes.
13446 if (isa<ConstantSDNode>(Inputs[i]))
13447 continue;
13448
13449 SDValue InSrc = Inputs[i].getOperand(0);
13450 if (Inputs[i].getValueType() == N->getValueType(0))
13451 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13452 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13453 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13454 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13455 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13456 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13457 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13458 else
13459 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13460 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13461 }
13462
13463 std::list<HandleSDNode> PromOpHandles;
13464 for (auto &PromOp : PromOps)
13465 PromOpHandles.emplace_back(PromOp);
13466
13467 // Replace all operations (these are all the same, but have a different
13468 // (promoted) return type). DAG.getNode will validate that the types of
13469 // a binary operator match, so go through the list in reverse so that
13470 // we've likely promoted both operands first.
13471 while (!PromOpHandles.empty()) {
13472 SDValue PromOp = PromOpHandles.back().getValue();
13473 PromOpHandles.pop_back();
13474
13475 unsigned C;
13476 switch (PromOp.getOpcode()) {
13477 default: C = 0; break;
13478 case ISD::SELECT: C = 1; break;
13479 case ISD::SELECT_CC: C = 2; break;
13480 }
13481
13482 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13483 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13484 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13485 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13486 // The to-be-promoted operands of this node have not yet been
13487 // promoted (this should be rare because we're going through the
13488 // list backward, but if one of the operands has several users in
13489 // this cluster of to-be-promoted nodes, it is possible).
13490 PromOpHandles.emplace_front(PromOp);
13491 continue;
13492 }
13493
13494 // For SELECT and SELECT_CC nodes, we do a similar check for any
13495 // to-be-promoted comparison inputs.
13496 if (PromOp.getOpcode() == ISD::SELECT ||
13497 PromOp.getOpcode() == ISD::SELECT_CC) {
13498 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13499 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13500 (SelectTruncOp[1].count(PromOp.getNode()) &&
13501 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13502 PromOpHandles.emplace_front(PromOp);
13503 continue;
13504 }
13505 }
13506
13507 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13508 PromOp.getNode()->op_end());
13509
13510 // If this node has constant inputs, then they'll need to be promoted here.
13511 for (unsigned i = 0; i < 2; ++i) {
13512 if (!isa<ConstantSDNode>(Ops[C+i]))
13513 continue;
13514 if (Ops[C+i].getValueType() == N->getValueType(0))
13515 continue;
13516
13517 if (N->getOpcode() == ISD::SIGN_EXTEND)
13518 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13519 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13520 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13521 else
13522 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13523 }
13524
13525 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13526 // truncate them again to the original value type.
13527 if (PromOp.getOpcode() == ISD::SELECT ||
13528 PromOp.getOpcode() == ISD::SELECT_CC) {
13529 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13530 if (SI0 != SelectTruncOp[0].end())
13531 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13532 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13533 if (SI1 != SelectTruncOp[1].end())
13534 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13535 }
13536
13537 DAG.ReplaceAllUsesOfValueWith(PromOp,
13538 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13539 }
13540
13541 // Now we're left with the initial extension itself.
13542 if (!ReallyNeedsExt)
13543 return N->getOperand(0);
13544
13545 // To zero extend, just mask off everything except for the first bit (in the
13546 // i1 case).
13547 if (N->getOpcode() == ISD::ZERO_EXTEND)
13548 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13549 DAG.getConstant(APInt::getLowBitsSet(
13550 N->getValueSizeInBits(0), PromBits),
13551 dl, N->getValueType(0)));
13552
13553 assert(N->getOpcode() == ISD::SIGN_EXTEND &&((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13554, __PRETTY_FUNCTION__))
13554 "Invalid extension type")((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13554, __PRETTY_FUNCTION__))
;
13555 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13556 SDValue ShiftCst =
13557 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13558 return DAG.getNode(
13559 ISD::SRA, dl, N->getValueType(0),
13560 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13561 ShiftCst);
13562}
13563
13564SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13565 DAGCombinerInfo &DCI) const {
13566 assert(N->getOpcode() == ISD::SETCC &&((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13567, __PRETTY_FUNCTION__))
13567 "Should be called with a SETCC node")((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13567, __PRETTY_FUNCTION__))
;
13568
13569 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13570 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13571 SDValue LHS = N->getOperand(0);
13572 SDValue RHS = N->getOperand(1);
13573
13574 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13575 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13576 LHS.hasOneUse())
13577 std::swap(LHS, RHS);
13578
13579 // x == 0-y --> x+y == 0
13580 // x != 0-y --> x+y != 0
13581 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13582 RHS.hasOneUse()) {
13583 SDLoc DL(N);
13584 SelectionDAG &DAG = DCI.DAG;
13585 EVT VT = N->getValueType(0);
13586 EVT OpVT = LHS.getValueType();
13587 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13588 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13589 }
13590 }
13591
13592 return DAGCombineTruncBoolExt(N, DCI);
13593}
13594
13595// Is this an extending load from an f32 to an f64?
13596static bool isFPExtLoad(SDValue Op) {
13597 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13598 return LD->getExtensionType() == ISD::EXTLOAD &&
13599 Op.getValueType() == MVT::f64;
13600 return false;
13601}
13602
13603/// Reduces the number of fp-to-int conversion when building a vector.
13604///
13605/// If this vector is built out of floating to integer conversions,
13606/// transform it to a vector built out of floating point values followed by a
13607/// single floating to integer conversion of the vector.
13608/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13609/// becomes (fptosi (build_vector ($A, $B, ...)))
13610SDValue PPCTargetLowering::
13611combineElementTruncationToVectorTruncation(SDNode *N,
13612 DAGCombinerInfo &DCI) const {
13613 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13614, __PRETTY_FUNCTION__))
13614 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13614, __PRETTY_FUNCTION__))
;
13615
13616 SelectionDAG &DAG = DCI.DAG;
13617 SDLoc dl(N);
13618
13619 SDValue FirstInput = N->getOperand(0);
13620 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__))
13621 "The input operand must be an fp-to-int conversion.")((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__))
;
13622
13623 // This combine happens after legalization so the fp_to_[su]i nodes are
13624 // already converted to PPCSISD nodes.
13625 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13626 if (FirstConversion == PPCISD::FCTIDZ ||
13627 FirstConversion == PPCISD::FCTIDUZ ||
13628 FirstConversion == PPCISD::FCTIWZ ||
13629 FirstConversion == PPCISD::FCTIWUZ) {
13630 bool IsSplat = true;
13631 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13632 FirstConversion == PPCISD::FCTIWUZ;
13633 EVT SrcVT = FirstInput.getOperand(0).getValueType();
13634 SmallVector<SDValue, 4> Ops;
13635 EVT TargetVT = N->getValueType(0);
13636 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13637 SDValue NextOp = N->getOperand(i);
13638 if (NextOp.getOpcode() != PPCISD::MFVSR)
13639 return SDValue();
13640 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13641 if (NextConversion != FirstConversion)
13642 return SDValue();
13643 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13644 // This is not valid if the input was originally double precision. It is
13645 // also not profitable to do unless this is an extending load in which
13646 // case doing this combine will allow us to combine consecutive loads.
13647 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13648 return SDValue();
13649 if (N->getOperand(i) != FirstInput)
13650 IsSplat = false;
13651 }
13652
13653 // If this is a splat, we leave it as-is since there will be only a single
13654 // fp-to-int conversion followed by a splat of the integer. This is better
13655 // for 32-bit and smaller ints and neutral for 64-bit ints.
13656 if (IsSplat)
13657 return SDValue();
13658
13659 // Now that we know we have the right type of node, get its operands
13660 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13661 SDValue In = N->getOperand(i).getOperand(0);
13662 if (Is32Bit) {
13663 // For 32-bit values, we need to add an FP_ROUND node (if we made it
13664 // here, we know that all inputs are extending loads so this is safe).
13665 if (In.isUndef())
13666 Ops.push_back(DAG.getUNDEF(SrcVT));
13667 else {
13668 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13669 MVT::f32, In.getOperand(0),
13670 DAG.getIntPtrConstant(1, dl));
13671 Ops.push_back(Trunc);
13672 }
13673 } else
13674 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13675 }
13676
13677 unsigned Opcode;
13678 if (FirstConversion == PPCISD::FCTIDZ ||
13679 FirstConversion == PPCISD::FCTIWZ)
13680 Opcode = ISD::FP_TO_SINT;
13681 else
13682 Opcode = ISD::FP_TO_UINT;
13683
13684 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13685 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13686 return DAG.getNode(Opcode, dl, TargetVT, BV);
13687 }
13688 return SDValue();
13689}
13690
13691/// Reduce the number of loads when building a vector.
13692///
13693/// Building a vector out of multiple loads can be converted to a load
13694/// of the vector type if the loads are consecutive. If the loads are
13695/// consecutive but in descending order, a shuffle is added at the end
13696/// to reorder the vector.
13697static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13698 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13699, __PRETTY_FUNCTION__))
13699 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13699, __PRETTY_FUNCTION__))
;
13700
13701 SDLoc dl(N);
13702
13703 // Return early for non byte-sized type, as they can't be consecutive.
13704 if (!N->getValueType(0).getVectorElementType().isByteSized())
13705 return SDValue();
13706
13707 bool InputsAreConsecutiveLoads = true;
13708 bool InputsAreReverseConsecutive = true;
13709 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13710 SDValue FirstInput = N->getOperand(0);
13711 bool IsRoundOfExtLoad = false;
13712
13713 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13714 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13715 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13716 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13717 }
13718 // Not a build vector of (possibly fp_rounded) loads.
13719 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13720 N->getNumOperands() == 1)
13721 return SDValue();
13722
13723 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13724 // If any inputs are fp_round(extload), they all must be.
13725 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13726 return SDValue();
13727
13728 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13729 N->getOperand(i);
13730 if (NextInput.getOpcode() != ISD::LOAD)
13731 return SDValue();
13732
13733 SDValue PreviousInput =
13734 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13735 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13736 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13737
13738 // If any inputs are fp_round(extload), they all must be.
13739 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13740 return SDValue();
13741
13742 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13743 InputsAreConsecutiveLoads = false;
13744 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13745 InputsAreReverseConsecutive = false;
13746
13747 // Exit early if the loads are neither consecutive nor reverse consecutive.
13748 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13749 return SDValue();
13750 }
13751
13752 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13753, __PRETTY_FUNCTION__))
13753 "The loads cannot be both consecutive and reverse consecutive.")((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13753, __PRETTY_FUNCTION__))
;
13754
13755 SDValue FirstLoadOp =
13756 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13757 SDValue LastLoadOp =
13758 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13759 N->getOperand(N->getNumOperands()-1);
13760
13761 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13762 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13763 if (InputsAreConsecutiveLoads) {
13764 assert(LD1 && "Input needs to be a LoadSDNode.")((LD1 && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13764, __PRETTY_FUNCTION__))
;
13765 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13766 LD1->getBasePtr(), LD1->getPointerInfo(),
13767 LD1->getAlignment());
13768 }
13769 if (InputsAreReverseConsecutive) {
13770 assert(LDL && "Input needs to be a LoadSDNode.")((LDL && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13770, __PRETTY_FUNCTION__))
;
13771 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13772 LDL->getBasePtr(), LDL->getPointerInfo(),
13773 LDL->getAlignment());
13774 SmallVector<int, 16> Ops;
13775 for (int i = N->getNumOperands() - 1; i >= 0; i--)
13776 Ops.push_back(i);
13777
13778 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13779 DAG.getUNDEF(N->getValueType(0)), Ops);
13780 }
13781 return SDValue();
13782}
13783
13784// This function adds the required vector_shuffle needed to get
13785// the elements of the vector extract in the correct position
13786// as specified by the CorrectElems encoding.
13787static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13788 SDValue Input, uint64_t Elems,
13789 uint64_t CorrectElems) {
13790 SDLoc dl(N);
13791
13792 unsigned NumElems = Input.getValueType().getVectorNumElements();
13793 SmallVector<int, 16> ShuffleMask(NumElems, -1);
13794
13795 // Knowing the element indices being extracted from the original
13796 // vector and the order in which they're being inserted, just put
13797 // them at element indices required for the instruction.
13798 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13799 if (DAG.getDataLayout().isLittleEndian())
13800 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13801 else
13802 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13803 CorrectElems = CorrectElems >> 8;
13804 Elems = Elems >> 8;
13805 }
13806
13807 SDValue Shuffle =
13808 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13809 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13810
13811 EVT VT = N->getValueType(0);
13812 SDValue Conv = DAG.getBitcast(VT, Shuffle);
13813
13814 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13815 Input.getValueType().getVectorElementType(),
13816 VT.getVectorNumElements());
13817 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13818 DAG.getValueType(ExtVT));
13819}
13820
13821// Look for build vector patterns where input operands come from sign
13822// extended vector_extract elements of specific indices. If the correct indices
13823// aren't used, add a vector shuffle to fix up the indices and create
13824// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13825// during instruction selection.
13826static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13827 // This array encodes the indices that the vector sign extend instructions
13828 // extract from when extending from one type to another for both BE and LE.
13829 // The right nibble of each byte corresponds to the LE incides.
13830 // and the left nibble of each byte corresponds to the BE incides.
13831 // For example: 0x3074B8FC byte->word
13832 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13833 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13834 // For example: 0x000070F8 byte->double word
13835 // For LE: the allowed indices are: 0x0,0x8
13836 // For BE: the allowed indices are: 0x7,0xF
13837 uint64_t TargetElems[] = {
13838 0x3074B8FC, // b->w
13839 0x000070F8, // b->d
13840 0x10325476, // h->w
13841 0x00003074, // h->d
13842 0x00001032, // w->d
13843 };
13844
13845 uint64_t Elems = 0;
13846 int Index;
13847 SDValue Input;
13848
13849 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13850 if (!Op)
13851 return false;
13852 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13853 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13854 return false;
13855
13856 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13857 // of the right width.
13858 SDValue Extract = Op.getOperand(0);
13859 if (Extract.getOpcode() == ISD::ANY_EXTEND)
13860 Extract = Extract.getOperand(0);
13861 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13862 return false;
13863
13864 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13865 if (!ExtOp)
13866 return false;
13867
13868 Index = ExtOp->getZExtValue();
13869 if (Input && Input != Extract.getOperand(0))
13870 return false;
13871
13872 if (!Input)
13873 Input = Extract.getOperand(0);
13874
13875 Elems = Elems << 8;
13876 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13877 Elems |= Index;
13878
13879 return true;
13880 };
13881
13882 // If the build vector operands aren't sign extended vector extracts,
13883 // of the same input vector, then return.
13884 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13885 if (!isSExtOfVecExtract(N->getOperand(i))) {
13886 return SDValue();
13887 }
13888 }
13889
13890 // If the vector extract indicies are not correct, add the appropriate
13891 // vector_shuffle.
13892 int TgtElemArrayIdx;
13893 int InputSize = Input.getValueType().getScalarSizeInBits();
13894 int OutputSize = N->getValueType(0).getScalarSizeInBits();
13895 if (InputSize + OutputSize == 40)
13896 TgtElemArrayIdx = 0;
13897 else if (InputSize + OutputSize == 72)
13898 TgtElemArrayIdx = 1;
13899 else if (InputSize + OutputSize == 48)
13900 TgtElemArrayIdx = 2;
13901 else if (InputSize + OutputSize == 80)
13902 TgtElemArrayIdx = 3;
13903 else if (InputSize + OutputSize == 96)
13904 TgtElemArrayIdx = 4;
13905 else
13906 return SDValue();
13907
13908 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13909 CorrectElems = DAG.getDataLayout().isLittleEndian()
13910 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13911 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13912 if (Elems != CorrectElems) {
13913 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13914 }
13915
13916 // Regular lowering will catch cases where a shuffle is not needed.
13917 return SDValue();
13918}
13919
13920// Look for the pattern of a load from a narrow width to i128, feeding
13921// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13922// (LXVRZX). This node represents a zero extending load that will be matched
13923// to the Load VSX Vector Rightmost instructions.
13924static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13925 SDLoc DL(N);
13926
13927 // This combine is only eligible for a BUILD_VECTOR of v1i128.
13928 if (N->getValueType(0) != MVT::v1i128)
13929 return SDValue();
13930
13931 SDValue Operand = N->getOperand(0);
13932 // Proceed with the transformation if the operand to the BUILD_VECTOR
13933 // is a load instruction.
13934 if (Operand.getOpcode() != ISD::LOAD)
13935 return SDValue();
13936
13937 LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13938 EVT MemoryType = LD->getMemoryVT();
13939
13940 // This transformation is only valid if the we are loading either a byte,
13941 // halfword, word, or doubleword.
13942 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13943 MemoryType == MVT::i32 || MemoryType == MVT::i64;
13944
13945 // Ensure that the load from the narrow width is being zero extended to i128.
13946 if (!ValidLDType ||
13947 (LD->getExtensionType() != ISD::ZEXTLOAD &&
13948 LD->getExtensionType() != ISD::EXTLOAD))
13949 return SDValue();
13950
13951 SDValue LoadOps[] = {
13952 LD->getChain(), LD->getBasePtr(),
13953 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13954
13955 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
13956 DAG.getVTList(MVT::v1i128, MVT::Other),
13957 LoadOps, MemoryType, LD->getMemOperand());
13958}
13959
13960SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13961 DAGCombinerInfo &DCI) const {
13962 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13963, __PRETTY_FUNCTION__))
13963 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13963, __PRETTY_FUNCTION__))
;
13964
13965 SelectionDAG &DAG = DCI.DAG;
13966 SDLoc dl(N);
13967
13968 if (!Subtarget.hasVSX())
13969 return SDValue();
13970
13971 // The target independent DAG combiner will leave a build_vector of
13972 // float-to-int conversions intact. We can generate MUCH better code for
13973 // a float-to-int conversion of a vector of floats.
13974 SDValue FirstInput = N->getOperand(0);
13975 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13976 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13977 if (Reduced)
13978 return Reduced;
13979 }
13980
13981 // If we're building a vector out of consecutive loads, just load that
13982 // vector type.
13983 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13984 if (Reduced)
13985 return Reduced;
13986
13987 // If we're building a vector out of extended elements from another vector
13988 // we have P9 vector integer extend instructions. The code assumes legal
13989 // input types (i.e. it can't handle things like v4i16) so do not run before
13990 // legalization.
13991 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13992 Reduced = combineBVOfVecSExt(N, DAG);
13993 if (Reduced)
13994 return Reduced;
13995 }
13996
13997 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13998 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13999 // is a load from <valid narrow width> to i128.
14000 if (Subtarget.isISA3_1()) {
14001 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14002 if (BVOfZLoad)
14003 return BVOfZLoad;
14004 }
14005
14006 if (N->getValueType(0) != MVT::v2f64)
14007 return SDValue();
14008
14009 // Looking for:
14010 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14011 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14012 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14013 return SDValue();
14014 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14015 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14016 return SDValue();
14017 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14018 return SDValue();
14019
14020 SDValue Ext1 = FirstInput.getOperand(0);
14021 SDValue Ext2 = N->getOperand(1).getOperand(0);
14022 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14023 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14024 return SDValue();
14025
14026 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14027 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14028 if (!Ext1Op || !Ext2Op)
14029 return SDValue();
14030 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14031 Ext1.getOperand(0) != Ext2.getOperand(0))
14032 return SDValue();
14033
14034 int FirstElem = Ext1Op->getZExtValue();
14035 int SecondElem = Ext2Op->getZExtValue();
14036 int SubvecIdx;
14037 if (FirstElem == 0 && SecondElem == 1)
14038 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14039 else if (FirstElem == 2 && SecondElem == 3)
14040 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14041 else
14042 return SDValue();
14043
14044 SDValue SrcVec = Ext1.getOperand(0);
14045 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14046 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14047 return DAG.getNode(NodeType, dl, MVT::v2f64,
14048 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14049}
14050
14051SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14052 DAGCombinerInfo &DCI) const {
14053 assert((N->getOpcode() == ISD::SINT_TO_FP ||(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))
14054 N->getOpcode() == ISD::UINT_TO_FP) &&(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))
14055 "Need an int -> FP conversion node here")(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14055, __PRETTY_FUNCTION__))
;
14056
14057 if (useSoftFloat() || !Subtarget.has64BitSupport())
14058 return SDValue();
14059
14060 SelectionDAG &DAG = DCI.DAG;
14061 SDLoc dl(N);
14062 SDValue Op(N, 0);
14063
14064 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14065 // from the hardware.
14066 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14067 return SDValue();
14068 if (!Op.getOperand(0).getValueType().isSimple())
14069 return SDValue();
14070 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14071 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14072 return SDValue();
14073
14074 SDValue FirstOperand(Op.getOperand(0));
14075 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14076 (FirstOperand.getValueType() == MVT::i8 ||
14077 FirstOperand.getValueType() == MVT::i16);
14078 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14079 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14080 bool DstDouble = Op.getValueType() == MVT::f64;
14081 unsigned ConvOp = Signed ?
14082 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14083 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14084 SDValue WidthConst =
14085 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14086 dl, false);
14087 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14088 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14089 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14090 DAG.getVTList(MVT::f64, MVT::Other),
14091 Ops, MVT::i8, LDN->getMemOperand());
14092
14093 // For signed conversion, we need to sign-extend the value in the VSR
14094 if (Signed) {
14095 SDValue ExtOps[] = { Ld, WidthConst };
14096 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14097 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14098 } else
14099 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14100 }
14101
14102
14103 // For i32 intermediate values, unfortunately, the conversion functions
14104 // leave the upper 32 bits of the value are undefined. Within the set of
14105 // scalar instructions, we have no method for zero- or sign-extending the
14106 // value. Thus, we cannot handle i32 intermediate values here.
14107 if (Op.getOperand(0).getValueType() == MVT::i32)
14108 return SDValue();
14109
14110 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14111, __PRETTY_FUNCTION__))
14111 "UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14111, __PRETTY_FUNCTION__))
;
14112
14113 // If we have FCFIDS, then use it when converting to single-precision.
14114 // Otherwise, convert to double-precision and then round.
14115 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14116 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14117 : PPCISD::FCFIDS)
14118 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14119 : PPCISD::FCFID);
14120 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14121 ? MVT::f32
14122 : MVT::f64;
14123
14124 // If we're converting from a float, to an int, and back to a float again,
14125 // then we don't need the store/load pair at all.
14126 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14127 Subtarget.hasFPCVT()) ||
14128 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14129 SDValue Src = Op.getOperand(0).getOperand(0);
14130 if (Src.getValueType() == MVT::f32) {
14131 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14132 DCI.AddToWorklist(Src.getNode());
14133 } else if (Src.getValueType() != MVT::f64) {
14134 // Make sure that we don't pick up a ppc_fp128 source value.
14135 return SDValue();
14136 }
14137
14138 unsigned FCTOp =
14139 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14140 PPCISD::FCTIDUZ;
14141
14142 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14143 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14144
14145 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14146 FP = DAG.getNode(ISD::FP_ROUND, dl,
14147 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14148 DCI.AddToWorklist(FP.getNode());
14149 }
14150
14151 return FP;
14152 }
14153
14154 return SDValue();
14155}
14156
14157// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14158// builtins) into loads with swaps.
14159SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14160 DAGCombinerInfo &DCI) const {
14161 SelectionDAG &DAG = DCI.DAG;
14162 SDLoc dl(N);
14163 SDValue Chain;
14164 SDValue Base;
14165 MachineMemOperand *MMO;
14166
14167 switch (N->getOpcode()) {
14168 default:
14169 llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14169)
;
14170 case ISD::LOAD: {
14171 LoadSDNode *LD = cast<LoadSDNode>(N);
14172 Chain = LD->getChain();
14173 Base = LD->getBasePtr();
14174 MMO = LD->getMemOperand();
14175 // If the MMO suggests this isn't a load of a full vector, leave
14176 // things alone. For a built-in, we have to make the change for
14177 // correctness, so if there is a size problem that will be a bug.
14178 if (MMO->getSize() < 16)
14179 return SDValue();
14180 break;
14181 }
14182 case ISD::INTRINSIC_W_CHAIN: {
14183 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14184 Chain = Intrin->getChain();
14185 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14186 // us what we want. Get operand 2 instead.
14187 Base = Intrin->getOperand(2);
14188 MMO = Intrin->getMemOperand();
14189 break;
14190 }
14191 }
14192
14193 MVT VecTy = N->getValueType(0).getSimpleVT();
14194
14195 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14196 // aligned and the type is a vector with elements up to 4 bytes
14197 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14198 VecTy.getScalarSizeInBits() <= 32) {
14199 return SDValue();
14200 }
14201
14202 SDValue LoadOps[] = { Chain, Base };
14203 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14204 DAG.getVTList(MVT::v2f64, MVT::Other),
14205 LoadOps, MVT::v2f64, MMO);
14206
14207 DCI.AddToWorklist(Load.getNode());
14208 Chain = Load.getValue(1);
14209 SDValue Swap = DAG.getNode(
14210 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14211 DCI.AddToWorklist(Swap.getNode());
14212
14213 // Add a bitcast if the resulting load type doesn't match v2f64.
14214 if (VecTy != MVT::v2f64) {
14215 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14216 DCI.AddToWorklist(N.getNode());
14217 // Package {bitcast value, swap's chain} to match Load's shape.
14218 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14219 N, Swap.getValue(1));
14220 }
14221
14222 return Swap;
14223}
14224
14225// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14226// builtins) into stores with swaps.
14227SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14228 DAGCombinerInfo &DCI) const {
14229 SelectionDAG &DAG = DCI.DAG;
14230 SDLoc dl(N);
14231 SDValue Chain;
14232 SDValue Base;
14233 unsigned SrcOpnd;
14234 MachineMemOperand *MMO;
14235
14236 switch (N->getOpcode()) {
14237 default:
14238 llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14238)
;
14239 case ISD::STORE: {
14240 StoreSDNode *ST = cast<StoreSDNode>(N);
14241 Chain = ST->getChain();
14242 Base = ST->getBasePtr();
14243 MMO = ST->getMemOperand();
14244 SrcOpnd = 1;
14245 // If the MMO suggests this isn't a store of a full vector, leave
14246 // things alone. For a built-in, we have to make the change for
14247 // correctness, so if there is a size problem that will be a bug.
14248 if (MMO->getSize() < 16)
14249 return SDValue();
14250 break;
14251 }
14252 case ISD::INTRINSIC_VOID: {
14253 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14254 Chain = Intrin->getChain();
14255 // Intrin->getBasePtr() oddly does not get what we want.
14256 Base = Intrin->getOperand(3);
14257 MMO = Intrin->getMemOperand();
14258 SrcOpnd = 2;
14259 break;
14260 }
14261 }
14262
14263 SDValue Src = N->getOperand(SrcOpnd);
14264 MVT VecTy = Src.getValueType().getSimpleVT();
14265
14266 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14267 // aligned and the type is a vector with elements up to 4 bytes
14268 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14269 VecTy.getScalarSizeInBits() <= 32) {
14270 return SDValue();
14271 }
14272
14273 // All stores are done as v2f64 and possible bit cast.
14274 if (VecTy != MVT::v2f64) {
14275 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14276 DCI.AddToWorklist(Src.getNode());
14277 }
14278
14279 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14280 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14281 DCI.AddToWorklist(Swap.getNode());
14282 Chain = Swap.getValue(1);
14283 SDValue StoreOps[] = { Chain, Swap, Base };
14284 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14285 DAG.getVTList(MVT::Other),
14286 StoreOps, VecTy, MMO);
14287 DCI.AddToWorklist(Store.getNode());
14288 return Store;
14289}
14290
14291// Handle DAG combine for STORE (FP_TO_INT F).
14292SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14293 DAGCombinerInfo &DCI) const {
14294
14295 SelectionDAG &DAG = DCI.DAG;
14296 SDLoc dl(N);
14297 unsigned Opcode = N->getOperand(1).getOpcode();
14298
14299 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14300, __PRETTY_FUNCTION__))
14300 && "Not a FP_TO_INT Instruction!")(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14300, __PRETTY_FUNCTION__))
;
14301
14302 SDValue Val = N->getOperand(1).getOperand(0);
14303 EVT Op1VT = N->getOperand(1).getValueType();
14304 EVT ResVT = Val.getValueType();
14305
14306 if (!isTypeLegal(ResVT))
14307 return SDValue();
14308
14309 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14310 bool ValidTypeForStoreFltAsInt =
14311 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14312 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14313
14314 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14315 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14316 return SDValue();
14317
14318 // Extend f32 values to f64
14319 if (ResVT.getScalarSizeInBits() == 32) {
14320 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14321 DCI.AddToWorklist(Val.getNode());
14322 }
14323
14324 // Set signed or unsigned conversion opcode.
14325 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14326 PPCISD::FP_TO_SINT_IN_VSR :
14327 PPCISD::FP_TO_UINT_IN_VSR;
14328
14329 Val = DAG.getNode(ConvOpcode,
14330 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14331 DCI.AddToWorklist(Val.getNode());
14332
14333 // Set number of bytes being converted.
14334 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14335 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14336 DAG.getIntPtrConstant(ByteSize, dl, false),
14337 DAG.getValueType(Op1VT) };
14338
14339 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14340 DAG.getVTList(MVT::Other), Ops,
14341 cast<StoreSDNode>(N)->getMemoryVT(),
14342 cast<StoreSDNode>(N)->getMemOperand());
14343
14344 DCI.AddToWorklist(Val.getNode());
14345 return Val;
14346}
14347
14348static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14349 // Check that the source of the element keeps flipping
14350 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14351 bool PrevElemFromFirstVec = Mask[0] < NumElts;
14352 for (int i = 1, e = Mask.size(); i < e; i++) {
14353 if (PrevElemFromFirstVec && Mask[i] < NumElts)
14354 return false;
14355 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14356 return false;
14357 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14358 }
14359 return true;
14360}
14361
14362static bool isSplatBV(SDValue Op) {
14363 if (Op.getOpcode() != ISD::BUILD_VECTOR)
14364 return false;
14365 SDValue FirstOp;
14366
14367 // Find first non-undef input.
14368 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14369 FirstOp = Op.getOperand(i);
14370 if (!FirstOp.isUndef())
14371 break;
14372 }
14373
14374 // All inputs are undef or the same as the first non-undef input.
14375 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14376 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14377 return false;
14378 return true;
14379}
14380
14381static SDValue isScalarToVec(SDValue Op) {
14382 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14383 return Op;
14384 if (Op.getOpcode() != ISD::BITCAST)
14385 return SDValue();
14386 Op = Op.getOperand(0);
14387 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14388 return Op;
14389 return SDValue();
14390}
14391
14392static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14393 int LHSMaxIdx, int RHSMinIdx,
14394 int RHSMaxIdx, int HalfVec) {
14395 for (int i = 0, e = ShuffV.size(); i < e; i++) {
14396 int Idx = ShuffV[i];
14397 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14398 ShuffV[i] += HalfVec;
14399 }
14400 return;
14401}
14402
14403// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14404// the original is:
14405// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14406// In such a case, just change the shuffle mask to extract the element
14407// from the permuted index.
14408static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14409 SDLoc dl(OrigSToV);
14410 EVT VT = OrigSToV.getValueType();
14411 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14412, __PRETTY_FUNCTION__))
14412 "Expecting a SCALAR_TO_VECTOR here")((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14412, __PRETTY_FUNCTION__))
;
14413 SDValue Input = OrigSToV.getOperand(0);
14414
14415 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14416 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14417 SDValue OrigVector = Input.getOperand(0);
14418
14419 // Can't handle non-const element indices or different vector types
14420 // for the input to the extract and the output of the scalar_to_vector.
14421 if (Idx && VT == OrigVector.getValueType()) {
14422 SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14423 NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14424 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14425 }
14426 }
14427 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14428 OrigSToV.getOperand(0));
14429}
14430
14431// On little endian subtargets, combine shuffles such as:
14432// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14433// into:
14434// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14435// because the latter can be matched to a single instruction merge.
14436// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14437// to put the value into element zero. Adjust the shuffle mask so that the
14438// vector can remain in permuted form (to prevent a swap prior to a shuffle).
14439SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14440 SelectionDAG &DAG) const {
14441 SDValue LHS = SVN->getOperand(0);
14442 SDValue RHS = SVN->getOperand(1);
14443 auto Mask = SVN->getMask();
14444 int NumElts = LHS.getValueType().getVectorNumElements();
14445 SDValue Res(SVN, 0);
14446 SDLoc dl(SVN);
14447
14448 // None of these combines are useful on big endian systems since the ISA
14449 // already has a big endian bias.
14450 if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14451 return Res;
14452
14453 // If this is not a shuffle of a shuffle and the first element comes from
14454 // the second vector, canonicalize to the commuted form. This will make it
14455 // more likely to match one of the single instruction patterns.
14456 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14457 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14458 std::swap(LHS, RHS);
14459 Res = DAG.getCommutedVectorShuffle(*SVN);
14460 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14461 }
14462
14463 // Adjust the shuffle mask if either input vector comes from a
14464 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14465 // form (to prevent the need for a swap).
14466 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14467 SDValue SToVLHS = isScalarToVec(LHS);
14468 SDValue SToVRHS = isScalarToVec(RHS);
14469 if (SToVLHS || SToVRHS) {
14470 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14471 : SToVRHS.getValueType().getVectorNumElements();
14472 int NumEltsOut = ShuffV.size();
14473
14474 // Initially assume that neither input is permuted. These will be adjusted
14475 // accordingly if either input is.
14476 int LHSMaxIdx = -1;
14477 int RHSMinIdx = -1;
14478 int RHSMaxIdx = -1;
14479 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14480
14481 // Get the permuted scalar to vector nodes for the source(s) that come from
14482 // ISD::SCALAR_TO_VECTOR.
14483 if (SToVLHS) {
14484 // Set up the values for the shuffle vector fixup.
14485 LHSMaxIdx = NumEltsOut / NumEltsIn;
14486 SToVLHS = getSToVPermuted(SToVLHS, DAG);
14487 if (SToVLHS.getValueType() != LHS.getValueType())
14488 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14489 LHS = SToVLHS;
14490 }
14491 if (SToVRHS) {
14492 RHSMinIdx = NumEltsOut;
14493 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14494 SToVRHS = getSToVPermuted(SToVRHS, DAG);
14495 if (SToVRHS.getValueType() != RHS.getValueType())
14496 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14497 RHS = SToVRHS;
14498 }
14499
14500 // Fix up the shuffle mask to reflect where the desired element actually is.
14501 // The minimum and maximum indices that correspond to element zero for both
14502 // the LHS and RHS are computed and will control which shuffle mask entries
14503 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14504 // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14505 // HalfVec to refer to the corresponding element in the permuted vector.
14506 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14507 HalfVec);
14508 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14509
14510 // We may have simplified away the shuffle. We won't be able to do anything
14511 // further with it here.
14512 if (!isa<ShuffleVectorSDNode>(Res))
14513 return Res;
14514 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14515 }
14516
14517 // The common case after we commuted the shuffle is that the RHS is a splat
14518 // and we have elements coming in from the splat at indices that are not
14519 // conducive to using a merge.
14520 // Example:
14521 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14522 if (!isSplatBV(RHS))
14523 return Res;
14524
14525 // We are looking for a mask such that all even elements are from
14526 // one vector and all odd elements from the other.
14527 if (!isAlternatingShuffMask(Mask, NumElts))
14528 return Res;
14529
14530 // Adjust the mask so we are pulling in the same index from the splat
14531 // as the index from the interesting vector in consecutive elements.
14532 // Example (even elements from first vector):
14533 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14534 if (Mask[0] < NumElts)
14535 for (int i = 1, e = Mask.size(); i < e; i += 2)
14536 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14537 // Example (odd elements from first vector):
14538 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14539 else
14540 for (int i = 0, e = Mask.size(); i < e; i += 2)
14541 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14542
14543 // If the RHS has undefs, we need to remove them since we may have created
14544 // a shuffle that adds those instead of the splat value.
14545 SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14546 RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14547
14548 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14549 return Res;
14550}
14551
14552SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14553 LSBaseSDNode *LSBase,
14554 DAGCombinerInfo &DCI) const {
14555 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14556, __PRETTY_FUNCTION__))
14556 "Not a reverse memop pattern!")(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14556, __PRETTY_FUNCTION__))
;
14557
14558 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14559 auto Mask = SVN->getMask();
14560 int i = 0;
14561 auto I = Mask.rbegin();
14562 auto E = Mask.rend();
14563
14564 for (; I != E; ++I) {
14565 if (*I != i)
14566 return false;
14567 i++;
14568 }
14569 return true;
14570 };
14571
14572 SelectionDAG &DAG = DCI.DAG;
14573 EVT VT = SVN->getValueType(0);
14574
14575 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14576 return SDValue();
14577
14578 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14579 // See comment in PPCVSXSwapRemoval.cpp.
14580 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14581 if (!Subtarget.hasP9Vector())
14582 return SDValue();
14583
14584 if(!IsElementReverse(SVN))
14585 return SDValue();
14586
14587 if (LSBase->getOpcode() == ISD::LOAD) {
14588 SDLoc dl(SVN);
14589 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14590 return DAG.getMemIntrinsicNode(
14591 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14592 LSBase->getMemoryVT(), LSBase->getMemOperand());
14593 }
14594
14595 if (LSBase->getOpcode() == ISD::STORE) {
14596 SDLoc dl(LSBase);
14597 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14598 LSBase->getBasePtr()};
14599 return DAG.getMemIntrinsicNode(
14600 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14601 LSBase->getMemoryVT(), LSBase->getMemOperand());
14602 }
14603
14604 llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14604)
;
14605}
14606
14607SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14608 DAGCombinerInfo &DCI) const {
14609 SelectionDAG &DAG = DCI.DAG;
14610 SDLoc dl(N);
14611 switch (N->getOpcode()) {
14612 default: break;
14613 case ISD::ADD:
14614 return combineADD(N, DCI);
14615 case ISD::SHL:
14616 return combineSHL(N, DCI);
14617 case ISD::SRA:
14618 return combineSRA(N, DCI);
14619 case ISD::SRL:
14620 return combineSRL(N, DCI);
14621 case ISD::MUL:
14622 return combineMUL(N, DCI);
14623 case ISD::FMA:
14624 case PPCISD::FNMSUB:
14625 return combineFMALike(N, DCI);
14626 case PPCISD::SHL:
14627 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14628 return N->getOperand(0);
14629 break;
14630 case PPCISD::SRL:
14631 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14632 return N->getOperand(0);
14633 break;
14634 case PPCISD::SRA:
14635 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14636 if (C->isNullValue() || // 0 >>s V -> 0.
14637 C->isAllOnesValue()) // -1 >>s V -> -1.
14638 return N->getOperand(0);
14639 }
14640 break;
14641 case ISD::SIGN_EXTEND:
14642 case ISD::ZERO_EXTEND:
14643 case ISD::ANY_EXTEND:
14644 return DAGCombineExtBoolTrunc(N, DCI);
14645 case ISD::TRUNCATE:
14646 return combineTRUNCATE(N, DCI);
14647 case ISD::SETCC:
14648 if (SDValue CSCC = combineSetCC(N, DCI))
14649 return CSCC;
14650 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14651 case ISD::SELECT_CC:
14652 return DAGCombineTruncBoolExt(N, DCI);
14653 case ISD::SINT_TO_FP:
14654 case ISD::UINT_TO_FP:
14655 return combineFPToIntToFP(N, DCI);
14656 case ISD::VECTOR_SHUFFLE:
14657 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14658 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14659 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14660 }
14661 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14662 case ISD::STORE: {
14663
14664 EVT Op1VT = N->getOperand(1).getValueType();
14665 unsigned Opcode = N->getOperand(1).getOpcode();
14666
14667 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14668 SDValue Val= combineStoreFPToInt(N, DCI);
14669 if (Val)
14670 return Val;
14671 }
14672
14673 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14674 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14675 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14676 if (Val)
14677 return Val;
14678 }
14679
14680 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14681 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14682 N->getOperand(1).getNode()->hasOneUse() &&
14683 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14684 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14685
14686 // STBRX can only handle simple types and it makes no sense to store less
14687 // two bytes in byte-reversed order.
14688 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14689 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14690 break;
14691
14692 SDValue BSwapOp = N->getOperand(1).getOperand(0);
14693 // Do an any-extend to 32-bits if this is a half-word input.
14694 if (BSwapOp.getValueType() == MVT::i16)
14695 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14696
14697 // If the type of BSWAP operand is wider than stored memory width
14698 // it need to be shifted to the right side before STBRX.
14699 if (Op1VT.bitsGT(mVT)) {
14700 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14701 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14702 DAG.getConstant(Shift, dl, MVT::i32));
14703 // Need to truncate if this is a bswap of i64 stored as i32/i16.
14704 if (Op1VT == MVT::i64)
14705 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14706 }
14707
14708 SDValue Ops[] = {
14709 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14710 };
14711 return
14712 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14713 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14714 cast<StoreSDNode>(N)->getMemOperand());
14715 }
14716
14717 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14718 // So it can increase the chance of CSE constant construction.
14719 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14720 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14721 // Need to sign-extended to 64-bits to handle negative values.
14722 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14723 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14724 MemVT.getSizeInBits());
14725 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14726
14727 // DAG.getTruncStore() can't be used here because it doesn't accept
14728 // the general (base + offset) addressing mode.
14729 // So we use UpdateNodeOperands and setTruncatingStore instead.
14730 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14731 N->getOperand(3));
14732 cast<StoreSDNode>(N)->setTruncatingStore(true);
14733 return SDValue(N, 0);
14734 }
14735
14736 // For little endian, VSX stores require generating xxswapd/lxvd2x.
14737 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14738 if (Op1VT.isSimple()) {
14739 MVT StoreVT = Op1VT.getSimpleVT();
14740 if (Subtarget.needsSwapsForVSXMemOps() &&
14741 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14742 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14743 return expandVSXStoreForLE(N, DCI);
14744 }
14745 break;
14746 }
14747 case ISD::LOAD: {
14748 LoadSDNode *LD = cast<LoadSDNode>(N);
14749 EVT VT = LD->getValueType(0);
14750
14751 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14752 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14753 if (VT.isSimple()) {
14754 MVT LoadVT = VT.getSimpleVT();
14755 if (Subtarget.needsSwapsForVSXMemOps() &&
14756 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14757 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14758 return expandVSXLoadForLE(N, DCI);
14759 }
14760
14761 // We sometimes end up with a 64-bit integer load, from which we extract
14762 // two single-precision floating-point numbers. This happens with
14763 // std::complex<float>, and other similar structures, because of the way we
14764 // canonicalize structure copies. However, if we lack direct moves,
14765 // then the final bitcasts from the extracted integer values to the
14766 // floating-point numbers turn into store/load pairs. Even with direct moves,
14767 // just loading the two floating-point numbers is likely better.
14768 auto ReplaceTwoFloatLoad = [&]() {
14769 if (VT != MVT::i64)
14770 return false;
14771
14772 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14773 LD->isVolatile())
14774 return false;
14775
14776 // We're looking for a sequence like this:
14777 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14778 // t16: i64 = srl t13, Constant:i32<32>
14779 // t17: i32 = truncate t16
14780 // t18: f32 = bitcast t17
14781 // t19: i32 = truncate t13
14782 // t20: f32 = bitcast t19
14783
14784 if (!LD->hasNUsesOfValue(2, 0))
14785 return false;
14786
14787 auto UI = LD->use_begin();
14788 while (UI.getUse().getResNo() != 0) ++UI;
14789 SDNode *Trunc = *UI++;
14790 while (UI.getUse().getResNo() != 0) ++UI;
14791 SDNode *RightShift = *UI;
14792 if (Trunc->getOpcode() != ISD::TRUNCATE)
14793 std::swap(Trunc, RightShift);
14794
14795 if (Trunc->getOpcode() != ISD::TRUNCATE ||
14796 Trunc->getValueType(0) != MVT::i32 ||
14797 !Trunc->hasOneUse())
14798 return false;
14799 if (RightShift->getOpcode() != ISD::SRL ||
14800 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14801 RightShift->getConstantOperandVal(1) != 32 ||
14802 !RightShift->hasOneUse())
14803 return false;
14804
14805 SDNode *Trunc2 = *RightShift->use_begin();
14806 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14807 Trunc2->getValueType(0) != MVT::i32 ||
14808 !Trunc2->hasOneUse())
14809 return false;
14810
14811 SDNode *Bitcast = *Trunc->use_begin();
14812 SDNode *Bitcast2 = *Trunc2->use_begin();
14813
14814 if (Bitcast->getOpcode() != ISD::BITCAST ||
14815 Bitcast->getValueType(0) != MVT::f32)
14816 return false;
14817 if (Bitcast2->getOpcode() != ISD::BITCAST ||
14818 Bitcast2->getValueType(0) != MVT::f32)
14819 return false;
14820
14821 if (Subtarget.isLittleEndian())
14822 std::swap(Bitcast, Bitcast2);
14823
14824 // Bitcast has the second float (in memory-layout order) and Bitcast2
14825 // has the first one.
14826
14827 SDValue BasePtr = LD->getBasePtr();
14828 if (LD->isIndexed()) {
14829 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14830, __PRETTY_FUNCTION__))
14830 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14830, __PRETTY_FUNCTION__))
;
14831 BasePtr =
14832 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14833 LD->getOffset());
14834 }
14835
14836 auto MMOFlags =
14837 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14838 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14839 LD->getPointerInfo(), LD->getAlignment(),
14840 MMOFlags, LD->getAAInfo());
14841 SDValue AddPtr =
14842 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14843 BasePtr, DAG.getIntPtrConstant(4, dl));
14844 SDValue FloatLoad2 = DAG.getLoad(
14845 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14846 LD->getPointerInfo().getWithOffset(4),
14847 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14848
14849 if (LD->isIndexed()) {
14850 // Note that DAGCombine should re-form any pre-increment load(s) from
14851 // what is produced here if that makes sense.
14852 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14853 }
14854
14855 DCI.CombineTo(Bitcast2, FloatLoad);
14856 DCI.CombineTo(Bitcast, FloatLoad2);
14857
14858 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14859 SDValue(FloatLoad2.getNode(), 1));
14860 return true;
14861 };
14862
14863 if (ReplaceTwoFloatLoad())
14864 return SDValue(N, 0);
14865
14866 EVT MemVT = LD->getMemoryVT();
14867 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14868 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14869 if (LD->isUnindexed() && VT.isVector() &&
14870 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14871 // P8 and later hardware should just use LOAD.
14872 !Subtarget.hasP8Vector() &&
14873 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14874 VT == MVT::v4f32))) &&
14875 LD->getAlign() < ABIAlignment) {
14876 // This is a type-legal unaligned Altivec load.
14877 SDValue Chain = LD->getChain();
14878 SDValue Ptr = LD->getBasePtr();
14879 bool isLittleEndian = Subtarget.isLittleEndian();
14880
14881 // This implements the loading of unaligned vectors as described in
14882 // the venerable Apple Velocity Engine overview. Specifically:
14883 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14884 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14885 //
14886 // The general idea is to expand a sequence of one or more unaligned
14887 // loads into an alignment-based permutation-control instruction (lvsl
14888 // or lvsr), a series of regular vector loads (which always truncate
14889 // their input address to an aligned address), and a series of
14890 // permutations. The results of these permutations are the requested
14891 // loaded values. The trick is that the last "extra" load is not taken
14892 // from the address you might suspect (sizeof(vector) bytes after the
14893 // last requested load), but rather sizeof(vector) - 1 bytes after the
14894 // last requested vector. The point of this is to avoid a page fault if
14895 // the base address happened to be aligned. This works because if the
14896 // base address is aligned, then adding less than a full vector length
14897 // will cause the last vector in the sequence to be (re)loaded.
14898 // Otherwise, the next vector will be fetched as you might suspect was
14899 // necessary.
14900
14901 // We might be able to reuse the permutation generation from
14902 // a different base address offset from this one by an aligned amount.
14903 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14904 // optimization later.
14905 Intrinsic::ID Intr, IntrLD, IntrPerm;
14906 MVT PermCntlTy, PermTy, LDTy;
14907 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14908 : Intrinsic::ppc_altivec_lvsl;
14909 IntrLD = Intrinsic::ppc_altivec_lvx;
14910 IntrPerm = Intrinsic::ppc_altivec_vperm;
14911 PermCntlTy = MVT::v16i8;
14912 PermTy = MVT::v4i32;
14913 LDTy = MVT::v4i32;
14914
14915 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14916
14917 // Create the new MMO for the new base load. It is like the original MMO,
14918 // but represents an area in memory almost twice the vector size centered
14919 // on the original address. If the address is unaligned, we might start
14920 // reading up to (sizeof(vector)-1) bytes below the address of the
14921 // original unaligned load.
14922 MachineFunction &MF = DAG.getMachineFunction();
14923 MachineMemOperand *BaseMMO =
14924 MF.getMachineMemOperand(LD->getMemOperand(),
14925 -(long)MemVT.getStoreSize()+1,
14926 2*MemVT.getStoreSize()-1);
14927
14928 // Create the new base load.
14929 SDValue LDXIntID =
14930 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14931 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14932 SDValue BaseLoad =
14933 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14934 DAG.getVTList(PermTy, MVT::Other),
14935 BaseLoadOps, LDTy, BaseMMO);
14936
14937 // Note that the value of IncOffset (which is provided to the next
14938 // load's pointer info offset value, and thus used to calculate the
14939 // alignment), and the value of IncValue (which is actually used to
14940 // increment the pointer value) are different! This is because we
14941 // require the next load to appear to be aligned, even though it
14942 // is actually offset from the base pointer by a lesser amount.
14943 int IncOffset = VT.getSizeInBits() / 8;
14944 int IncValue = IncOffset;
14945
14946 // Walk (both up and down) the chain looking for another load at the real
14947 // (aligned) offset (the alignment of the other load does not matter in
14948 // this case). If found, then do not use the offset reduction trick, as
14949 // that will prevent the loads from being later combined (as they would
14950 // otherwise be duplicates).
14951 if (!findConsecutiveLoad(LD, DAG))
14952 --IncValue;
14953
14954 SDValue Increment =
14955 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14956 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14957
14958 MachineMemOperand *ExtraMMO =
14959 MF.getMachineMemOperand(LD->getMemOperand(),
14960 1, 2*MemVT.getStoreSize()-1);
14961 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14962 SDValue ExtraLoad =
14963 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14964 DAG.getVTList(PermTy, MVT::Other),
14965 ExtraLoadOps, LDTy, ExtraMMO);
14966
14967 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14968 BaseLoad.getValue(1), ExtraLoad.getValue(1));
14969
14970 // Because vperm has a big-endian bias, we must reverse the order
14971 // of the input vectors and complement the permute control vector
14972 // when generating little endian code. We have already handled the
14973 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14974 // and ExtraLoad here.
14975 SDValue Perm;
14976 if (isLittleEndian)
14977 Perm = BuildIntrinsicOp(IntrPerm,
14978 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14979 else
14980 Perm = BuildIntrinsicOp(IntrPerm,
14981 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14982
14983 if (VT != PermTy)
14984 Perm = Subtarget.hasAltivec()
14985 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14986 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14987 DAG.getTargetConstant(1, dl, MVT::i64));
14988 // second argument is 1 because this rounding
14989 // is always exact.
14990
14991 // The output of the permutation is our loaded result, the TokenFactor is
14992 // our new chain.
14993 DCI.CombineTo(N, Perm, TF);
14994 return SDValue(N, 0);
14995 }
14996 }
14997 break;
14998 case ISD::INTRINSIC_WO_CHAIN: {
14999 bool isLittleEndian = Subtarget.isLittleEndian();
15000 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15001 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15002 : Intrinsic::ppc_altivec_lvsl);
15003 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15004 SDValue Add = N->getOperand(1);
15005
15006 int Bits = 4 /* 16 byte alignment */;
15007
15008 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15009 APInt::getAllOnesValue(Bits /* alignment */)
15010 .zext(Add.getScalarValueSizeInBits()))) {
15011 SDNode *BasePtr = Add->getOperand(0).getNode();
15012 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15013 UE = BasePtr->use_end();
15014 UI != UE; ++UI) {
15015 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15016 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15017 IID) {
15018 // We've found another LVSL/LVSR, and this address is an aligned
15019 // multiple of that one. The results will be the same, so use the
15020 // one we've just found instead.
15021
15022 return SDValue(*UI, 0);
15023 }
15024 }
15025 }
15026
15027 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15028 SDNode *BasePtr = Add->getOperand(0).getNode();
15029 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15030 UE = BasePtr->use_end(); UI != UE; ++UI) {
15031 if (UI->getOpcode() == ISD::ADD &&
15032 isa<ConstantSDNode>(UI->getOperand(1)) &&
15033 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15034 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15035 (1ULL << Bits) == 0) {
15036 SDNode *OtherAdd = *UI;
15037 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15038 VE = OtherAdd->use_end(); VI != VE; ++VI) {
15039 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15040 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15041 return SDValue(*VI, 0);
15042 }
15043 }
15044 }
15045 }
15046 }
15047 }
15048
15049 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15050 // Expose the vabsduw/h/b opportunity for down stream
15051 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15052 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15053 IID == Intrinsic::ppc_altivec_vmaxsh ||
15054 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15055 SDValue V1 = N->getOperand(1);
15056 SDValue V2 = N->getOperand(2);
15057 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15058 V1.getSimpleValueType() == MVT::v8i16 ||
15059 V1.getSimpleValueType() == MVT::v16i8) &&
15060 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15061 // (0-a, a)
15062 if (V1.getOpcode() == ISD::SUB &&
15063 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15064 V1.getOperand(1) == V2) {
15065 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15066 }
15067 // (a, 0-a)
15068 if (V2.getOpcode() == ISD::SUB &&
15069 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15070 V2.getOperand(1) == V1) {
15071 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15072 }
15073 // (x-y, y-x)
15074 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15075 V1.getOperand(0) == V2.getOperand(1) &&
15076 V1.getOperand(1) == V2.getOperand(0)) {
15077 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15078 }
15079 }
15080 }
15081 }
15082
15083 break;
15084 case ISD::INTRINSIC_W_CHAIN:
15085 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15086 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15087 if (Subtarget.needsSwapsForVSXMemOps()) {
15088 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15089 default:
15090 break;
15091 case Intrinsic::ppc_vsx_lxvw4x:
15092 case Intrinsic::ppc_vsx_lxvd2x:
15093 return expandVSXLoadForLE(N, DCI);
15094 }
15095 }
15096 break;
15097 case ISD::INTRINSIC_VOID:
15098 // For little endian, VSX stores require generating xxswapd/stxvd2x.
15099 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15100 if (Subtarget.needsSwapsForVSXMemOps()) {
15101 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15102 default:
15103 break;
15104 case Intrinsic::ppc_vsx_stxvw4x:
15105 case Intrinsic::ppc_vsx_stxvd2x:
15106 return expandVSXStoreForLE(N, DCI);
15107 }
15108 }
15109 break;
15110 case ISD::BSWAP:
15111 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15112 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15113 N->getOperand(0).hasOneUse() &&
15114 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15115 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15116 N->getValueType(0) == MVT::i64))) {
15117 SDValue Load = N->getOperand(0);
15118 LoadSDNode *LD = cast<LoadSDNode>(Load);
15119 // Create the byte-swapping load.
15120 SDValue Ops[] = {
15121 LD->getChain(), // Chain
15122 LD->getBasePtr(), // Ptr
15123 DAG.getValueType(N->getValueType(0)) // VT
15124 };
15125 SDValue BSLoad =
15126 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15127 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15128 MVT::i64 : MVT::i32, MVT::Other),
15129 Ops, LD->getMemoryVT(), LD->getMemOperand());
15130
15131 // If this is an i16 load, insert the truncate.
15132 SDValue ResVal = BSLoad;
15133 if (N->getValueType(0) == MVT::i16)
15134 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15135
15136 // First, combine the bswap away. This makes the value produced by the
15137 // load dead.
15138 DCI.CombineTo(N, ResVal);
15139
15140 // Next, combine the load away, we give it a bogus result value but a real
15141 // chain result. The result value is dead because the bswap is dead.
15142 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15143
15144 // Return N so it doesn't get rechecked!
15145 return SDValue(N, 0);
15146 }
15147 break;
15148 case PPCISD::VCMP:
15149 // If a VCMPo node already exists with exactly the same operands as this
15150 // node, use its result instead of this node (VCMPo computes both a CR6 and
15151 // a normal output).
15152 //
15153 if (!N->getOperand(0).hasOneUse() &&
15154 !N->getOperand(1).hasOneUse() &&
15155 !N->getOperand(2).hasOneUse()) {
15156
15157 // Scan all of the users of the LHS, looking for VCMPo's that match.
15158 SDNode *VCMPoNode = nullptr;
15159
15160 SDNode *LHSN = N->getOperand(0).getNode();
15161 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15162 UI != E; ++UI)
15163 if (UI->getOpcode() == PPCISD::VCMPo &&
15164 UI->getOperand(1) == N->getOperand(1) &&
15165 UI->getOperand(2) == N->getOperand(2) &&
15166 UI->getOperand(0) == N->getOperand(0)) {
15167 VCMPoNode = *UI;
15168 break;
15169 }
15170
15171 // If there is no VCMPo node, or if the flag value has a single use, don't
15172 // transform this.
15173 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
15174 break;
15175
15176 // Look at the (necessarily single) use of the flag value. If it has a
15177 // chain, this transformation is more complex. Note that multiple things
15178 // could use the value result, which we should ignore.
15179 SDNode *FlagUser = nullptr;
15180 for (SDNode::use_iterator UI = VCMPoNode->use_begin();
15181 FlagUser == nullptr; ++UI) {
15182 assert(UI != VCMPoNode->use_end() && "Didn't find user!")((UI != VCMPoNode->use_end() && "Didn't find user!"
) ? static_cast<void> (0) : __assert_fail ("UI != VCMPoNode->use_end() && \"Didn't find user!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15182, __PRETTY_FUNCTION__))
;
15183 SDNode *User = *UI;
15184 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15185 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
15186 FlagUser = User;
15187 break;
15188 }
15189 }
15190 }
15191
15192 // If the user is a MFOCRF instruction, we know this is safe.
15193 // Otherwise we give up for right now.
15194 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15195 return SDValue(VCMPoNode, 0);
15196 }
15197 break;
15198 case ISD::BRCOND: {
15199 SDValue Cond = N->getOperand(1);
15200 SDValue Target = N->getOperand(2);
15201
15202 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15203 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15204 Intrinsic::loop_decrement) {
15205
15206 // We now need to make the intrinsic dead (it cannot be instruction
15207 // selected).
15208 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15209 assert(Cond.getNode()->hasOneUse() &&((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15210, __PRETTY_FUNCTION__))
15210 "Counter decrement has more than one use")((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15210, __PRETTY_FUNCTION__))
;
15211
15212 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15213 N->getOperand(0), Target);
15214 }
15215 }
15216 break;
15217 case ISD::BR_CC: {
15218 // If this is a branch on an altivec predicate comparison, lower this so
15219 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15220 // lowering is done pre-legalize, because the legalizer lowers the predicate
15221 // compare down to code that is difficult to reassemble.
15222 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15223 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15224
15225 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15226 // value. If so, pass-through the AND to get to the intrinsic.
15227 if (LHS.getOpcode() == ISD::AND &&
15228 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15229 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15230 Intrinsic::loop_decrement &&
15231 isa<ConstantSDNode>(LHS.getOperand(1)) &&
15232 !isNullConstant(LHS.getOperand(1)))
15233 LHS = LHS.getOperand(0);
15234
15235 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15236 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15237 Intrinsic::loop_decrement &&
15238 isa<ConstantSDNode>(RHS)) {
15239 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15240, __PRETTY_FUNCTION__))
15240 "Counter decrement comparison is not EQ or NE")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15240, __PRETTY_FUNCTION__))
;
15241
15242 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15243 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15244 (CC == ISD::SETNE && !Val);
15245
15246 // We now need to make the intrinsic dead (it cannot be instruction
15247 // selected).
15248 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15249 assert(LHS.getNode()->hasOneUse() &&((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15250, __PRETTY_FUNCTION__))
15250 "Counter decrement has more than one use")((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15250, __PRETTY_FUNCTION__))
;
15251
15252 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15253 N->getOperand(0), N->getOperand(4));
15254 }
15255
15256 int CompareOpc;
15257 bool isDot;
15258
15259 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15260 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15261 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15262 assert(isDot && "Can't compare against a vector result!")((isDot && "Can't compare against a vector result!") ?
static_cast<void> (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15262, __PRETTY_FUNCTION__))
;
15263
15264 // If this is a comparison against something other than 0/1, then we know
15265 // that the condition is never/always true.
15266 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15267 if (Val != 0 && Val != 1) {
15268 if (CC == ISD::SETEQ) // Cond never true, remove branch.
15269 return N->getOperand(0);
15270 // Always !=, turn it into an unconditional branch.
15271 return DAG.getNode(ISD::BR, dl, MVT::Other,
15272 N->getOperand(0), N->getOperand(4));
15273 }
15274
15275 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15276
15277 // Create the PPCISD altivec 'dot' comparison node.
15278 SDValue Ops[] = {
15279 LHS.getOperand(2), // LHS of compare
15280 LHS.getOperand(3), // RHS of compare
15281 DAG.getConstant(CompareOpc, dl, MVT::i32)
15282 };
15283 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15284 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
15285
15286 // Unpack the result based on how the target uses it.
15287 PPC::Predicate CompOpc;
15288 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15289 default: // Can't happen, don't crash on invalid number though.
15290 case 0: // Branch on the value of the EQ bit of CR6.
15291 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15292 break;
15293 case 1: // Branch on the inverted value of the EQ bit of CR6.
15294 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15295 break;
15296 case 2: // Branch on the value of the LT bit of CR6.
15297 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15298 break;
15299 case 3: // Branch on the inverted value of the LT bit of CR6.
15300 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15301 break;
15302 }
15303
15304 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15305 DAG.getConstant(CompOpc, dl, MVT::i32),
15306 DAG.getRegister(PPC::CR6, MVT::i32),
15307 N->getOperand(4), CompNode.getValue(1));
15308 }
15309 break;
15310 }
15311 case ISD::BUILD_VECTOR:
15312 return DAGCombineBuildVector(N, DCI);
15313 case ISD::ABS:
15314 return combineABS(N, DCI);
15315 case ISD::VSELECT:
15316 return combineVSelect(N, DCI);
15317 }
15318
15319 return SDValue();
15320}
15321
15322SDValue
15323PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15324 SelectionDAG &DAG,
15325 SmallVectorImpl<SDNode *> &Created) const {
15326 // fold (sdiv X, pow2)
15327 EVT VT = N->getValueType(0);
15328 if (VT == MVT::i64 && !Subtarget.isPPC64())
15329 return SDValue();
15330 if ((VT != MVT::i32 && VT != MVT::i64) ||
15331 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15332 return SDValue();
15333
15334 SDLoc DL(N);
15335 SDValue N0 = N->getOperand(0);
15336
15337 bool IsNegPow2 = (-Divisor).isPowerOf2();
15338 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15339 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15340
15341 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15342 Created.push_back(Op.getNode());
15343
15344 if (IsNegPow2) {
15345 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15346 Created.push_back(Op.getNode());
15347 }
15348
15349 return Op;
15350}
15351
15352//===----------------------------------------------------------------------===//
15353// Inline Assembly Support
15354//===----------------------------------------------------------------------===//
15355
15356void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15357 KnownBits &Known,
15358 const APInt &DemandedElts,
15359 const SelectionDAG &DAG,
15360 unsigned Depth) const {
15361 Known.resetAll();
15362 switch (Op.getOpcode()) {
15363 default: break;
15364 case PPCISD::LBRX: {
15365 // lhbrx is known to have the top bits cleared out.
15366 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15367 Known.Zero = 0xFFFF0000;
15368 break;
15369 }
15370 case ISD::INTRINSIC_WO_CHAIN: {
15371 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15372 default: break;
15373 case Intrinsic::ppc_altivec_vcmpbfp_p:
15374 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15375 case Intrinsic::ppc_altivec_vcmpequb_p:
15376 case Intrinsic::ppc_altivec_vcmpequh_p:
15377 case Intrinsic::ppc_altivec_vcmpequw_p:
15378 case Intrinsic::ppc_altivec_vcmpequd_p:
15379 case Intrinsic::ppc_altivec_vcmpequq_p:
15380 case Intrinsic::ppc_altivec_vcmpgefp_p:
15381 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15382 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15383 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15384 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15385 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15386 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15387 case Intrinsic::ppc_altivec_vcmpgtub_p:
15388 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15389 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15390 case Intrinsic::ppc_altivec_vcmpgtud_p:
15391 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15392 Known.Zero = ~1U; // All bits but the low one are known to be zero.
15393 break;
15394 }
15395 }
15396 }
15397}
15398
15399Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15400 switch (Subtarget.getCPUDirective()) {
15401 default: break;
15402 case PPC::DIR_970:
15403 case PPC::DIR_PWR4:
15404 case PPC::DIR_PWR5:
15405 case PPC::DIR_PWR5X:
15406 case PPC::DIR_PWR6:
15407 case PPC::DIR_PWR6X:
15408 case PPC::DIR_PWR7:
15409 case PPC::DIR_PWR8:
15410 case PPC::DIR_PWR9:
15411 case PPC::DIR_PWR10:
15412 case PPC::DIR_PWR_FUTURE: {
15413 if (!ML)
15414 break;
15415
15416 if (!DisableInnermostLoopAlign32) {
15417 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15418 // so that we can decrease cache misses and branch-prediction misses.
15419 // Actual alignment of the loop will depend on the hotness check and other
15420 // logic in alignBlocks.
15421 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15422 return Align(32);
15423 }
15424
15425 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15426
15427 // For small loops (between 5 and 8 instructions), align to a 32-byte
15428 // boundary so that the entire loop fits in one instruction-cache line.
15429 uint64_t LoopSize = 0;
15430 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15431 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15432 LoopSize += TII->getInstSizeInBytes(*J);
15433 if (LoopSize > 32)
15434 break;
15435 }
15436
15437 if (LoopSize > 16 && LoopSize <= 32)
15438 return Align(32);
15439
15440 break;
15441 }
15442 }
15443
15444 return TargetLowering::getPrefLoopAlignment(ML);
15445}
15446
15447/// getConstraintType - Given a constraint, return the type of
15448/// constraint it is for this target.
15449PPCTargetLowering::ConstraintType
15450PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15451 if (Constraint.size() == 1) {
15452 switch (Constraint[0]) {
15453 default: break;
15454 case 'b':
15455 case 'r':
15456 case 'f':
15457 case 'd':
15458 case 'v':
15459 case 'y':
15460 return C_RegisterClass;
15461 case 'Z':
15462 // FIXME: While Z does indicate a memory constraint, it specifically
15463 // indicates an r+r address (used in conjunction with the 'y' modifier
15464 // in the replacement string). Currently, we're forcing the base
15465 // register to be r0 in the asm printer (which is interpreted as zero)
15466 // and forming the complete address in the second register. This is
15467 // suboptimal.
15468 return C_Memory;
15469 }
15470 } else if (Constraint == "wc") { // individual CR bits.
15471 return C_RegisterClass;
15472 } else if (Constraint == "wa" || Constraint == "wd" ||
15473 Constraint == "wf" || Constraint == "ws" ||
15474 Constraint == "wi" || Constraint == "ww") {
15475 return C_RegisterClass; // VSX registers.
15476 }
15477 return TargetLowering::getConstraintType(Constraint);
15478}
15479
15480/// Examine constraint type and operand type and determine a weight value.
15481/// This object must already have been set up with the operand type
15482/// and the current alternative constraint selected.
15483TargetLowering::ConstraintWeight
15484PPCTargetLowering::getSingleConstraintMatchWeight(
15485 AsmOperandInfo &info, const char *constraint) const {
15486 ConstraintWeight weight = CW_Invalid;
15487 Value *CallOperandVal = info.CallOperandVal;
15488 // If we don't have a value, we can't do a match,
15489 // but allow it at the lowest weight.
15490 if (!CallOperandVal)
15491 return CW_Default;
15492 Type *type = CallOperandVal->getType();
15493
15494 // Look at the constraint type.
15495 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15496 return CW_Register; // an individual CR bit.
15497 else if ((StringRef(constraint) == "wa" ||
15498 StringRef(constraint) == "wd" ||
15499 StringRef(constraint) == "wf") &&
15500 type->isVectorTy())
15501 return CW_Register;
15502 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15503 return CW_Register; // just hold 64-bit integers data.
15504 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15505 return CW_Register;
15506 else if (StringRef(constraint) == "ww" && type->isFloatTy())
15507 return CW_Register;
15508
15509 switch (*constraint) {
15510 default:
15511 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15512 break;
15513 case 'b':
15514 if (type->isIntegerTy())
15515 weight = CW_Register;
15516 break;
15517 case 'f':
15518 if (type->isFloatTy())
15519 weight = CW_Register;
15520 break;
15521 case 'd':
15522 if (type->isDoubleTy())
15523 weight = CW_Register;
15524 break;
15525 case 'v':
15526 if (type->isVectorTy())
15527 weight = CW_Register;
15528 break;
15529 case 'y':
15530 weight = CW_Register;
15531 break;
15532 case 'Z':
15533 weight = CW_Memory;
15534 break;
15535 }
15536 return weight;
15537}
15538
15539std::pair<unsigned, const TargetRegisterClass *>
15540PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15541 StringRef Constraint,
15542 MVT VT) const {
15543 if (Constraint.size() == 1) {
15544 // GCC RS6000 Constraint Letters
15545 switch (Constraint[0]) {
15546 case 'b': // R1-R31
15547 if (VT == MVT::i64 && Subtarget.isPPC64())
15548 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15549 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15550 case 'r': // R0-R31
15551 if (VT == MVT::i64 && Subtarget.isPPC64())
15552 return std::make_pair(0U, &PPC::G8RCRegClass);
15553 return std::make_pair(0U, &PPC::GPRCRegClass);
15554 // 'd' and 'f' constraints are both defined to be "the floating point
15555 // registers", where one is for 32-bit and the other for 64-bit. We don't
15556 // really care overly much here so just give them all the same reg classes.
15557 case 'd':
15558 case 'f':
15559 if (Subtarget.hasSPE()) {
15560 if (VT == MVT::f32 || VT == MVT::i32)
15561 return std::make_pair(0U, &PPC::GPRCRegClass);
15562 if (VT == MVT::f64 || VT == MVT::i64)
15563 return std::make_pair(0U, &PPC::SPERCRegClass);
15564 } else {
15565 if (VT == MVT::f32 || VT == MVT::i32)
15566 return std::make_pair(0U, &PPC::F4RCRegClass);
15567 if (VT == MVT::f64 || VT == MVT::i64)
15568 return std::make_pair(0U, &PPC::F8RCRegClass);
15569 }
15570 break;
15571 case 'v':
15572 if (Subtarget.hasAltivec())
15573 return std::make_pair(0U, &PPC::VRRCRegClass);
15574 break;
15575 case 'y': // crrc
15576 return std::make_pair(0U, &PPC::CRRCRegClass);
15577 }
15578 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15579 // An individual CR bit.
15580 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15581 } else if ((Constraint == "wa" || Constraint == "wd" ||
15582 Constraint == "wf" || Constraint == "wi") &&
15583 Subtarget.hasVSX()) {
15584 return std::make_pair(0U, &PPC::VSRCRegClass);
15585 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15586 if (VT == MVT::f32 && Subtarget.hasP8Vector())
15587 return std::make_pair(0U, &PPC::VSSRCRegClass);
15588 else
15589 return std::make_pair(0U, &PPC::VSFRCRegClass);
15590 }
15591
15592 // If we name a VSX register, we can't defer to the base class because it
15593 // will not recognize the correct register (their names will be VSL{0-31}
15594 // and V{0-31} so they won't match). So we match them here.
15595 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15596 int VSNum = atoi(Constraint.data() + 3);
15597 assert(VSNum >= 0 && VSNum <= 63 &&((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15598, __PRETTY_FUNCTION__))
15598 "Attempted to access a vsr out of range")((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15598, __PRETTY_FUNCTION__))
;
15599 if (VSNum < 32)
15600 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15601 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15602 }
15603 std::pair<unsigned, const TargetRegisterClass *> R =
15604 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15605
15606 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15607 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15608 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15609 // register.
15610 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15611 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15612 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15613 PPC::GPRCRegClass.contains(R.first))
15614 return std::make_pair(TRI->getMatchingSuperReg(R.first,
15615 PPC::sub_32, &PPC::G8RCRegClass),
15616 &PPC::G8RCRegClass);
15617
15618 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15619 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15620 R.first = PPC::CR0;
15621 R.second = &PPC::CRRCRegClass;
15622 }
15623
15624 return R;
15625}
15626
15627/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15628/// vector. If it is invalid, don't add anything to Ops.
15629void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15630 std::string &Constraint,
15631 std::vector<SDValue>&Ops,
15632 SelectionDAG &DAG) const {
15633 SDValue Result;
15634
15635 // Only support length 1 constraints.
15636 if (Constraint.length() > 1) return;
15637
15638 char Letter = Constraint[0];
15639 switch (Letter) {
15640 default: break;
15641 case 'I':
15642 case 'J':
15643 case 'K':
15644 case 'L':
15645 case 'M':
15646 case 'N':
15647 case 'O':
15648 case 'P': {
15649 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15650 if (!CST) return; // Must be an immediate to match.
15651 SDLoc dl(Op);
15652 int64_t Value = CST->getSExtValue();
15653 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15654 // numbers are printed as such.
15655 switch (Letter) {
15656 default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15656)
;
15657 case 'I': // "I" is a signed 16-bit constant.
15658 if (isInt<16>(Value))
15659 Result = DAG.getTargetConstant(Value, dl, TCVT);
15660 break;
15661 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15662 if (isShiftedUInt<16, 16>(Value))
15663 Result = DAG.getTargetConstant(Value, dl, TCVT);
15664 break;
15665 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15666 if (isShiftedInt<16, 16>(Value))
15667 Result = DAG.getTargetConstant(Value, dl, TCVT);
15668 break;
15669 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15670 if (isUInt<16>(Value))
15671 Result = DAG.getTargetConstant(Value, dl, TCVT);
15672 break;
15673 case 'M': // "M" is a constant that is greater than 31.
15674 if (Value > 31)
15675 Result = DAG.getTargetConstant(Value, dl, TCVT);
15676 break;
15677 case 'N': // "N" is a positive constant that is an exact power of two.
15678 if (Value > 0 && isPowerOf2_64(Value))
15679 Result = DAG.getTargetConstant(Value, dl, TCVT);
15680 break;
15681 case 'O': // "O" is the constant zero.
15682 if (Value == 0)
15683 Result = DAG.getTargetConstant(Value, dl, TCVT);
15684 break;
15685 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15686 if (isInt<16>(-Value))
15687 Result = DAG.getTargetConstant(Value, dl, TCVT);
15688 break;
15689 }
15690 break;
15691 }
15692 }
15693
15694 if (Result.getNode()) {
15695 Ops.push_back(Result);
15696 return;
15697 }
15698
15699 // Handle standard constraint letters.
15700 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15701}
15702
15703// isLegalAddressingMode - Return true if the addressing mode represented
15704// by AM is legal for this target, for a load/store of the specified type.
15705bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15706 const AddrMode &AM, Type *Ty,
15707 unsigned AS,
15708 Instruction *I) const {
15709 // Vector type r+i form is supported since power9 as DQ form. We don't check
15710 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15711 // imm form is preferred and the offset can be adjusted to use imm form later
15712 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15713 // max offset to check legal addressing mode, we should be a little aggressive
15714 // to contain other offsets for that LSRUse.
15715 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15716 return false;
15717
15718 // PPC allows a sign-extended 16-bit immediate field.
15719 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15720 return false;
15721
15722 // No global is ever allowed as a base.
15723 if (AM.BaseGV)
15724 return false;
15725
15726 // PPC only support r+r,
15727 switch (AM.Scale) {
15728 case 0: // "r+i" or just "i", depending on HasBaseReg.
15729 break;
15730 case 1:
15731 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15732 return false;
15733 // Otherwise we have r+r or r+i.
15734 break;
15735 case 2:
15736 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15737 return false;
15738 // Allow 2*r as r+r.
15739 break;
15740 default:
15741 // No other scales are supported.
15742 return false;
15743 }
15744
15745 return true;
15746}
15747
15748SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15749 SelectionDAG &DAG) const {
15750 MachineFunction &MF = DAG.getMachineFunction();
15751 MachineFrameInfo &MFI = MF.getFrameInfo();
15752 MFI.setReturnAddressIsTaken(true);
15753
15754 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15755 return SDValue();
15756
15757 SDLoc dl(Op);
15758 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15759
15760 // Make sure the function does not optimize away the store of the RA to
15761 // the stack.
15762 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15763 FuncInfo->setLRStoreRequired();
15764 bool isPPC64 = Subtarget.isPPC64();
15765 auto PtrVT = getPointerTy(MF.getDataLayout());
15766
15767 if (Depth > 0) {
15768 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15769 SDValue Offset =
15770 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15771 isPPC64 ? MVT::i64 : MVT::i32);
15772 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15773 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15774 MachinePointerInfo());
15775 }
15776
15777 // Just load the return address off the stack.
15778 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15779 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15780 MachinePointerInfo());
15781}
15782
15783SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15784 SelectionDAG &DAG) const {
15785 SDLoc dl(Op);
15786 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15787
15788 MachineFunction &MF = DAG.getMachineFunction();
15789 MachineFrameInfo &MFI = MF.getFrameInfo();
15790 MFI.setFrameAddressIsTaken(true);
15791
15792 EVT PtrVT = getPointerTy(MF.getDataLayout());
15793 bool isPPC64 = PtrVT == MVT::i64;
15794
15795 // Naked functions never have a frame pointer, and so we use r1. For all
15796 // other functions, this decision must be delayed until during PEI.
15797 unsigned FrameReg;
15798 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15799 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15800 else
15801 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15802
15803 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15804 PtrVT);
15805 while (Depth--)
15806 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15807 FrameAddr, MachinePointerInfo());
15808 return FrameAddr;
15809}
15810
15811// FIXME? Maybe this could be a TableGen attribute on some registers and
15812// this table could be generated automatically from RegInfo.
15813Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15814 const MachineFunction &MF) const {
15815 bool isPPC64 = Subtarget.isPPC64();
15816
15817 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15818 if (!is64Bit && VT != LLT::scalar(32))
15819 report_fatal_error("Invalid register global variable type");
15820
15821 Register Reg = StringSwitch<Register>(RegName)
15822 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15823 .Case("r2", isPPC64 ? Register() : PPC::R2)
15824 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15825 .Default(Register());
15826
15827 if (Reg)
15828 return Reg;
15829 report_fatal_error("Invalid register name global variable");
15830}
15831
15832bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15833 // 32-bit SVR4 ABI access everything as got-indirect.
15834 if (Subtarget.is32BitELFABI())
15835 return true;
15836
15837 // AIX accesses everything indirectly through the TOC, which is similar to
15838 // the GOT.
15839 if (Subtarget.isAIXABI())
15840 return true;
15841
15842 CodeModel::Model CModel = getTargetMachine().getCodeModel();
15843 // If it is small or large code model, module locals are accessed
15844 // indirectly by loading their address from .toc/.got.
15845 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15846 return true;
15847
15848 // JumpTable and BlockAddress are accessed as got-indirect.
15849 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15850 return true;
15851
15852 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15853 return Subtarget.isGVIndirectSymbol(G->getGlobal());
15854
15855 return false;
15856}
15857
15858bool
15859PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15860 // The PowerPC target isn't yet aware of offsets.
15861 return false;
15862}
15863
15864bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15865 const CallInst &I,
15866 MachineFunction &MF,
15867 unsigned Intrinsic) const {
15868 switch (Intrinsic) {
15869 case Intrinsic::ppc_altivec_lvx:
15870 case Intrinsic::ppc_altivec_lvxl:
15871 case Intrinsic::ppc_altivec_lvebx:
15872 case Intrinsic::ppc_altivec_lvehx:
15873 case Intrinsic::ppc_altivec_lvewx:
15874 case Intrinsic::ppc_vsx_lxvd2x:
15875 case Intrinsic::ppc_vsx_lxvw4x:
15876 case Intrinsic::ppc_vsx_lxvd2x_be:
15877 case Intrinsic::ppc_vsx_lxvw4x_be:
15878 case Intrinsic::ppc_vsx_lxvl:
15879 case Intrinsic::ppc_vsx_lxvll: {
15880 EVT VT;
15881 switch (Intrinsic) {
15882 case Intrinsic::ppc_altivec_lvebx:
15883 VT = MVT::i8;
15884 break;
15885 case Intrinsic::ppc_altivec_lvehx:
15886 VT = MVT::i16;
15887 break;
15888 case Intrinsic::ppc_altivec_lvewx:
15889 VT = MVT::i32;
15890 break;
15891 case Intrinsic::ppc_vsx_lxvd2x:
15892 case Intrinsic::ppc_vsx_lxvd2x_be:
15893 VT = MVT::v2f64;
15894 break;
15895 default:
15896 VT = MVT::v4i32;
15897 break;
15898 }
15899
15900 Info.opc = ISD::INTRINSIC_W_CHAIN;
15901 Info.memVT = VT;
15902 Info.ptrVal = I.getArgOperand(0);
15903 Info.offset = -VT.getStoreSize()+1;
15904 Info.size = 2*VT.getStoreSize()-1;
15905 Info.align = Align(1);
15906 Info.flags = MachineMemOperand::MOLoad;
15907 return true;
15908 }
15909 case Intrinsic::ppc_altivec_stvx:
15910 case Intrinsic::ppc_altivec_stvxl:
15911 case Intrinsic::ppc_altivec_stvebx:
15912 case Intrinsic::ppc_altivec_stvehx:
15913 case Intrinsic::ppc_altivec_stvewx:
15914 case Intrinsic::ppc_vsx_stxvd2x:
15915 case Intrinsic::ppc_vsx_stxvw4x:
15916 case Intrinsic::ppc_vsx_stxvd2x_be:
15917 case Intrinsic::ppc_vsx_stxvw4x_be:
15918 case Intrinsic::ppc_vsx_stxvl:
15919 case Intrinsic::ppc_vsx_stxvll: {
15920 EVT VT;
15921 switch (Intrinsic) {
15922 case Intrinsic::ppc_altivec_stvebx:
15923 VT = MVT::i8;
15924 break;
15925 case Intrinsic::ppc_altivec_stvehx:
15926 VT = MVT::i16;
15927 break;
15928 case Intrinsic::ppc_altivec_stvewx:
15929 VT = MVT::i32;
15930 break;
15931 case Intrinsic::ppc_vsx_stxvd2x:
15932 case Intrinsic::ppc_vsx_stxvd2x_be:
15933 VT = MVT::v2f64;
15934 break;
15935 default:
15936 VT = MVT::v4i32;
15937 break;
15938 }
15939
15940 Info.opc = ISD::INTRINSIC_VOID;
15941 Info.memVT = VT;
15942 Info.ptrVal = I.getArgOperand(1);
15943 Info.offset = -VT.getStoreSize()+1;
15944 Info.size = 2*VT.getStoreSize()-1;
15945 Info.align = Align(1);
15946 Info.flags = MachineMemOperand::MOStore;
15947 return true;
15948 }
15949 default:
15950 break;
15951 }
15952
15953 return false;
15954}
15955
15956/// It returns EVT::Other if the type should be determined using generic
15957/// target-independent logic.
15958EVT PPCTargetLowering::getOptimalMemOpType(
15959 const MemOp &Op, const AttributeList &FuncAttributes) const {
15960 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15961 // We should use Altivec/VSX loads and stores when available. For unaligned
15962 // addresses, unaligned VSX loads are only fast starting with the P8.
15963 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15964 (Op.isAligned(Align(16)) ||
15965 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15966 return MVT::v4i32;
15967 }
15968
15969 if (Subtarget.isPPC64()) {
15970 return MVT::i64;
15971 }
15972
15973 return MVT::i32;
15974}
15975
15976/// Returns true if it is beneficial to convert a load of a constant
15977/// to just the constant itself.
15978bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15979 Type *Ty) const {
15980 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15980, __PRETTY_FUNCTION__))
;
15981
15982 unsigned BitSize = Ty->getPrimitiveSizeInBits();
15983 return !(BitSize == 0 || BitSize > 64);
15984}
15985
15986bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15987 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15988 return false;
15989 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15990 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15991 return NumBits1 == 64 && NumBits2 == 32;
15992}
15993
15994bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15995 if (!VT1.isInteger() || !VT2.isInteger())
15996 return false;
15997 unsigned NumBits1 = VT1.getSizeInBits();
15998 unsigned NumBits2 = VT2.getSizeInBits();
15999 return NumBits1 == 64 && NumBits2 == 32;
16000}
16001
16002bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16003 // Generally speaking, zexts are not free, but they are free when they can be
16004 // folded with other operations.
16005 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16006 EVT MemVT = LD->getMemoryVT();
16007 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16008 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16009 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16010 LD->getExtensionType() == ISD::ZEXTLOAD))
16011 return true;
16012 }
16013
16014 // FIXME: Add other cases...
16015 // - 32-bit shifts with a zext to i64
16016 // - zext after ctlz, bswap, etc.
16017 // - zext after and by a constant mask
16018
16019 return TargetLowering::isZExtFree(Val, VT2);
16020}
16021
16022bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16023 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16024, __PRETTY_FUNCTION__))
16024 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16024, __PRETTY_FUNCTION__))
;
16025 // Extending to float128 is not free.
16026 if (DestVT == MVT::f128)
16027 return false;
16028 return true;
16029}
16030
16031bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16032 return isInt<16>(Imm) || isUInt<16>(Imm);
16033}
16034
16035bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16036 return isInt<16>(Imm) || isUInt<16>(Imm);
16037}
16038
16039bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
16040 unsigned,
16041 unsigned,
16042 MachineMemOperand::Flags,
16043 bool *Fast) const {
16044 if (DisablePPCUnaligned)
16045 return false;
16046
16047 // PowerPC supports unaligned memory access for simple non-vector types.
16048 // Although accessing unaligned addresses is not as efficient as accessing
16049 // aligned addresses, it is generally more efficient than manual expansion,
16050 // and generally only traps for software emulation when crossing page
16051 // boundaries.
16052
16053 if (!VT.isSimple())
16054 return false;
16055
16056 if (VT.isFloatingPoint() && !VT.isVector() &&
16057 !Subtarget.allowsUnalignedFPAccess())
16058 return false;
16059
16060 if (VT.getSimpleVT().isVector()) {
16061 if (Subtarget.hasVSX()) {
16062 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16063 VT != MVT::v4f32 && VT != MVT::v4i32)
16064 return false;
16065 } else {
16066 return false;
16067 }
16068 }
16069
16070 if (VT == MVT::ppcf128)
16071 return false;
16072
16073 if (Fast)
16074 *Fast = true;
16075
16076 return true;
16077}
16078
16079bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16080 SDValue C) const {
16081 // Check integral scalar types.
16082 if (!VT.isScalarInteger())
16083 return false;
16084 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16085 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16086 return false;
16087 // This transformation will generate >= 2 operations. But the following
16088 // cases will generate <= 2 instructions during ISEL. So exclude them.
16089 // 1. If the constant multiplier fits 16 bits, it can be handled by one
16090 // HW instruction, ie. MULLI
16091 // 2. If the multiplier after shifted fits 16 bits, an extra shift
16092 // instruction is needed than case 1, ie. MULLI and RLDICR
16093 int64_t Imm = ConstNode->getSExtValue();
16094 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16095 Imm >>= Shift;
16096 if (isInt<16>(Imm))
16097 return false;
16098 uint64_t UImm = static_cast<uint64_t>(Imm);
16099 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16100 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16101 return true;
16102 }
16103 return false;
16104}
16105
16106bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16107 EVT VT) const {
16108 return isFMAFasterThanFMulAndFAdd(
16109 MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16110}
16111
16112bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16113 Type *Ty) const {
16114 switch (Ty->getScalarType()->getTypeID()) {
16115 case Type::FloatTyID:
16116 case Type::DoubleTyID:
16117 return true;
16118 case Type::FP128TyID:
16119 return Subtarget.hasP9Vector();
16120 default:
16121 return false;
16122 }
16123}
16124
16125// FIXME: add more patterns which are not profitable to hoist.
16126bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16127 if (!I->hasOneUse())
16128 return true;
16129
16130 Instruction *User = I->user_back();
16131 assert(User && "A single use instruction with no uses.")((User && "A single use instruction with no uses.") ?
static_cast<void> (0) : __assert_fail ("User && \"A single use instruction with no uses.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16131, __PRETTY_FUNCTION__))
;
16132
16133 switch (I->getOpcode()) {
16134 case Instruction::FMul: {
16135 // Don't break FMA, PowerPC prefers FMA.
16136 if (User->getOpcode() != Instruction::FSub &&
16137 User->getOpcode() != Instruction::FAdd)
16138 return true;
16139
16140 const TargetOptions &Options = getTargetMachine().Options;
16141 const Function *F = I->getFunction();
16142 const DataLayout &DL = F->getParent()->getDataLayout();
16143 Type *Ty = User->getOperand(0)->getType();
16144
16145 return !(
16146 isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16147 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16148 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16149 }
16150 case Instruction::Load: {
16151 // Don't break "store (load float*)" pattern, this pattern will be combined
16152 // to "store (load int32)" in later InstCombine pass. See function
16153 // combineLoadToOperationType. On PowerPC, loading a float point takes more
16154 // cycles than loading a 32 bit integer.
16155 LoadInst *LI = cast<LoadInst>(I);
16156 // For the loads that combineLoadToOperationType does nothing, like
16157 // ordered load, it should be profitable to hoist them.
16158 // For swifterror load, it can only be used for pointer to pointer type, so
16159 // later type check should get rid of this case.
16160 if (!LI->isUnordered())
16161 return true;
16162
16163 if (User->getOpcode() != Instruction::Store)
16164 return true;
16165
16166 if (I->getType()->getTypeID() != Type::FloatTyID)
16167 return true;
16168
16169 return false;
16170 }
16171 default:
16172 return true;
16173 }
16174 return true;
16175}
16176
16177const MCPhysReg *
16178PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16179 // LR is a callee-save register, but we must treat it as clobbered by any call
16180 // site. Hence we include LR in the scratch registers, which are in turn added
16181 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16182 // to CTR, which is used by any indirect call.
16183 static const MCPhysReg ScratchRegs[] = {
16184 PPC::X12, PPC::LR8, PPC::CTR8, 0
16185 };
16186
16187 return ScratchRegs;
16188}
16189
16190Register PPCTargetLowering::getExceptionPointerRegister(
16191 const Constant *PersonalityFn) const {
16192 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16193}
16194
16195Register PPCTargetLowering::getExceptionSelectorRegister(
16196 const Constant *PersonalityFn) const {
16197 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16198}
16199
16200bool
16201PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16202 EVT VT , unsigned DefinedValues) const {
16203 if (VT == MVT::v2i64)
16204 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16205
16206 if (Subtarget.hasVSX())
16207 return true;
16208
16209 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16210}
16211
16212Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16213 if (DisableILPPref || Subtarget.enableMachineScheduler())
16214 return TargetLowering::getSchedulingPreference(N);
16215
16216 return Sched::ILP;
16217}
16218
16219// Create a fast isel object.
16220FastISel *
16221PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16222 const TargetLibraryInfo *LibInfo) const {
16223 return PPC::createFastISel(FuncInfo, LibInfo);
16224}
16225
16226// 'Inverted' means the FMA opcode after negating one multiplicand.
16227// For example, (fma -a b c) = (fnmsub a b c)
16228static unsigned invertFMAOpcode(unsigned Opc) {
16229 switch (Opc) {
16230 default:
16231 llvm_unreachable("Invalid FMA opcode for PowerPC!")::llvm::llvm_unreachable_internal("Invalid FMA opcode for PowerPC!"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16231)
;
16232 case ISD::FMA:
16233 return PPCISD::FNMSUB;
16234 case PPCISD::FNMSUB:
16235 return ISD::FMA;
16236 }
16237}
16238
16239SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16240 bool LegalOps, bool OptForSize,
16241 NegatibleCost &Cost,
16242 unsigned Depth) const {
16243 if (Depth > SelectionDAG::MaxRecursionDepth)
16244 return SDValue();
16245
16246 unsigned Opc = Op.getOpcode();
16247 EVT VT = Op.getValueType();
16248 SDNodeFlags Flags = Op.getNode()->getFlags();
16249
16250 switch (Opc) {
16251 case PPCISD::FNMSUB:
16252 if (!Op.hasOneUse() || !isTypeLegal(VT))
16253 break;
16254
16255 const TargetOptions &Options = getTargetMachine().Options;
16256 SDValue N0 = Op.getOperand(0);
16257 SDValue N1 = Op.getOperand(1);
16258 SDValue N2 = Op.getOperand(2);
16259 SDLoc Loc(Op);
16260
16261 NegatibleCost N2Cost = NegatibleCost::Expensive;
16262 SDValue NegN2 =
16263 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16264
16265 if (!NegN2)
16266 return SDValue();
16267
16268 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16269 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16270 // These transformations may change sign of zeroes. For example,
16271 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16272 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16273 // Try and choose the cheaper one to negate.
16274 NegatibleCost N0Cost = NegatibleCost::Expensive;
16275 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16276 N0Cost, Depth + 1);
16277
16278 NegatibleCost N1Cost = NegatibleCost::Expensive;
16279 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16280 N1Cost, Depth + 1);
16281
16282 if (NegN0 && N0Cost <= N1Cost) {
16283 Cost = std::min(N0Cost, N2Cost);
16284 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16285 } else if (NegN1) {
16286 Cost = std::min(N1Cost, N2Cost);
16287 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16288 }
16289 }
16290
16291 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16292 if (isOperationLegal(ISD::FMA, VT)) {
16293 Cost = N2Cost;
16294 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16295 }
16296
16297 break;
16298 }
16299
16300 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16301 Cost, Depth);
16302}
16303
16304// Override to enable LOAD_STACK_GUARD lowering on Linux.
16305bool PPCTargetLowering::useLoadStackGuardNode() const {
16306 if (!Subtarget.isTargetLinux())
16307 return TargetLowering::useLoadStackGuardNode();
16308 return true;
16309}
16310
16311// Override to disable global variable loading on Linux.
16312void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16313 if (!Subtarget.isTargetLinux())
16314 return TargetLowering::insertSSPDeclarations(M);
16315}
16316
16317bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16318 bool ForCodeSize) const {
16319 if (!VT.isSimple() || !Subtarget.hasVSX())
16320 return false;
16321
16322 switch(VT.getSimpleVT().SimpleTy) {
16323 default:
16324 // For FP types that are currently not supported by PPC backend, return
16325 // false. Examples: f16, f80.
16326 return false;
16327 case MVT::f32:
16328 case MVT::f64:
16329 if (Subtarget.hasPrefixInstrs()) {
16330 // With prefixed instructions, we can materialize anything that can be
16331 // represented with a 32-bit immediate, not just positive zero.
16332 APFloat APFloatOfImm = Imm;
16333 return convertToNonDenormSingle(APFloatOfImm);
16334 }
16335 LLVM_FALLTHROUGH[[gnu::fallthrough]];
16336 case MVT::ppcf128:
16337 return Imm.isPosZero();
16338 }
16339}
16340
16341// For vector shift operation op, fold
16342// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16343static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16344 SelectionDAG &DAG) {
16345 SDValue N0 = N->getOperand(0);
16346 SDValue N1 = N->getOperand(1);
16347 EVT VT = N0.getValueType();
16348 unsigned OpSizeInBits = VT.getScalarSizeInBits();
16349 unsigned Opcode = N->getOpcode();
16350 unsigned TargetOpcode;
16351
16352 switch (Opcode) {
16353 default:
16354 llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16354)
;
16355 case ISD::SHL:
16356 TargetOpcode = PPCISD::SHL;
16357 break;
16358 case ISD::SRL:
16359 TargetOpcode = PPCISD::SRL;
16360 break;
16361 case ISD::SRA:
16362 TargetOpcode = PPCISD::SRA;
16363 break;
16364 }
16365
16366 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16367 N1->getOpcode() == ISD::AND)
16368 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16369 if (Mask->getZExtValue() == OpSizeInBits - 1)
16370 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16371
16372 return SDValue();
16373}
16374
16375SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16376 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16377 return Value;
16378
16379 SDValue N0 = N->getOperand(0);
16380 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16381 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16382 N0.getOpcode() != ISD::SIGN_EXTEND ||
16383 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16384 N->getValueType(0) != MVT::i64)
16385 return SDValue();
16386
16387 // We can't save an operation here if the value is already extended, and
16388 // the existing shift is easier to combine.
16389 SDValue ExtsSrc = N0.getOperand(0);
16390 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16391 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16392 return SDValue();
16393
16394 SDLoc DL(N0);
16395 SDValue ShiftBy = SDValue(CN1, 0);
16396 // We want the shift amount to be i32 on the extswli, but the shift could
16397 // have an i64.
16398 if (ShiftBy.getValueType() == MVT::i64)
16399 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16400
16401 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16402 ShiftBy);
16403}
16404
16405SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16406 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16407 return Value;
16408
16409 return SDValue();
16410}
16411
16412SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16413 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16414 return Value;
16415
16416 return SDValue();
16417}
16418
16419// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16420// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16421// When C is zero, the equation (addi Z, -C) can be simplified to Z
16422// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16423static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16424 const PPCSubtarget &Subtarget) {
16425 if (!Subtarget.isPPC64())
16426 return SDValue();
16427
16428 SDValue LHS = N->getOperand(0);
16429 SDValue RHS = N->getOperand(1);
16430
16431 auto isZextOfCompareWithConstant = [](SDValue Op) {
16432 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16433 Op.getValueType() != MVT::i64)
16434 return false;
16435
16436 SDValue Cmp = Op.getOperand(0);
16437 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16438 Cmp.getOperand(0).getValueType() != MVT::i64)
16439 return false;
16440
16441 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16442 int64_t NegConstant = 0 - Constant->getSExtValue();
16443 // Due to the limitations of the addi instruction,
16444 // -C is required to be [-32768, 32767].
16445 return isInt<16>(NegConstant);
16446 }
16447
16448 return false;
16449 };
16450
16451 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16452 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16453
16454 // If there is a pattern, canonicalize a zext operand to the RHS.
16455 if (LHSHasPattern && !RHSHasPattern)
16456 std::swap(LHS, RHS);
16457 else if (!LHSHasPattern && !RHSHasPattern)
16458 return SDValue();
16459
16460 SDLoc DL(N);
16461 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16462 SDValue Cmp = RHS.getOperand(0);
16463 SDValue Z = Cmp.getOperand(0);
16464 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16465
16466 assert(Constant && "Constant Should not be a null pointer.")((Constant && "Constant Should not be a null pointer."
) ? static_cast<void> (0) : __assert_fail ("Constant && \"Constant Should not be a null pointer.\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16466, __PRETTY_FUNCTION__))
;
16467 int64_t NegConstant = 0 - Constant->getSExtValue();
16468
16469 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16470 default: break;
16471 case ISD::SETNE: {
16472 // when C == 0
16473 // --> addze X, (addic Z, -1).carry
16474 // /
16475 // add X, (zext(setne Z, C))--
16476 // \ when -32768 <= -C <= 32767 && C != 0
16477 // --> addze X, (addic (addi Z, -C), -1).carry
16478 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16479 DAG.getConstant(NegConstant, DL, MVT::i64));
16480 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16481 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16482 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16483 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16484 SDValue(Addc.getNode(), 1));
16485 }
16486 case ISD::SETEQ: {
16487 // when C == 0
16488 // --> addze X, (subfic Z, 0).carry
16489 // /
16490 // add X, (zext(sete Z, C))--
16491 // \ when -32768 <= -C <= 32767 && C != 0
16492 // --> addze X, (subfic (addi Z, -C), 0).carry
16493 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16494 DAG.getConstant(NegConstant, DL, MVT::i64));
16495 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16496 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16497 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16498 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16499 SDValue(Subc.getNode(), 1));
16500 }
16501 }
16502
16503 return SDValue();
16504}
16505
16506// Transform
16507// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16508// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16509// In this case both C1 and C2 must be known constants.
16510// C1+C2 must fit into a 34 bit signed integer.
16511static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16512 const PPCSubtarget &Subtarget) {
16513 if (!Subtarget.isUsingPCRelativeCalls())
16514 return SDValue();
16515
16516 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16517 // If we find that node try to cast the Global Address and the Constant.
16518 SDValue LHS = N->getOperand(0);
16519 SDValue RHS = N->getOperand(1);
16520
16521 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16522 std::swap(LHS, RHS);
16523
16524 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16525 return SDValue();
16526
16527 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16528 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16529 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16530
16531 // Check that both casts succeeded.
16532 if (!GSDN || !ConstNode)
16533 return SDValue();
16534
16535 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16536 SDLoc DL(GSDN);
16537
16538 // The signed int offset needs to fit in 34 bits.
16539 if (!isInt<34>(NewOffset))
16540 return SDValue();
16541
16542 // The new global address is a copy of the old global address except
16543 // that it has the updated Offset.
16544 SDValue GA =
16545 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16546 NewOffset, GSDN->getTargetFlags());
16547 SDValue MatPCRel =
16548 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16549 return MatPCRel;
16550}
16551
16552SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16553 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16554 return Value;
16555
16556 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16557 return Value;
16558
16559 return SDValue();
16560}
16561
16562// Detect TRUNCATE operations on bitcasts of float128 values.
16563// What we are looking for here is the situtation where we extract a subset
16564// of bits from a 128 bit float.
16565// This can be of two forms:
16566// 1) BITCAST of f128 feeding TRUNCATE
16567// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16568// The reason this is required is because we do not have a legal i128 type
16569// and so we want to prevent having to store the f128 and then reload part
16570// of it.
16571SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16572 DAGCombinerInfo &DCI) const {
16573 // If we are using CRBits then try that first.
16574 if (Subtarget.useCRBits()) {
16575 // Check if CRBits did anything and return that if it did.
16576 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16577 return CRTruncValue;
16578 }
16579
16580 SDLoc dl(N);
16581 SDValue Op0 = N->getOperand(0);
16582
16583 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16584 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16585 EVT VT = N->getValueType(0);
16586 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16587 return SDValue();
16588 SDValue Sub = Op0.getOperand(0);
16589 if (Sub.getOpcode() == ISD::SUB) {
16590 SDValue SubOp0 = Sub.getOperand(0);
16591 SDValue SubOp1 = Sub.getOperand(1);
16592 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16593 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16594 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16595 SubOp1.getOperand(0),
16596 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16597 }
16598 }
16599 }
16600
16601 // Looking for a truncate of i128 to i64.
16602 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16603 return SDValue();
16604
16605 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16606
16607 // SRL feeding TRUNCATE.
16608 if (Op0.getOpcode() == ISD::SRL) {
16609 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16610 // The right shift has to be by 64 bits.
16611 if (!ConstNode || ConstNode->getZExtValue() != 64)
16612 return SDValue();
16613
16614 // Switch the element number to extract.
16615 EltToExtract = EltToExtract ? 0 : 1;
16616 // Update Op0 past the SRL.
16617 Op0 = Op0.getOperand(0);
16618 }
16619
16620 // BITCAST feeding a TRUNCATE possibly via SRL.
16621 if (Op0.getOpcode() == ISD::BITCAST &&
16622 Op0.getValueType() == MVT::i128 &&
16623 Op0.getOperand(0).getValueType() == MVT::f128) {
16624 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16625 return DCI.DAG.getNode(
16626 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16627 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16628 }
16629 return SDValue();
16630}
16631
16632SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16633 SelectionDAG &DAG = DCI.DAG;
16634
16635 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16636 if (!ConstOpOrElement)
16637 return SDValue();
16638
16639 // An imul is usually smaller than the alternative sequence for legal type.
16640 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16641 isOperationLegal(ISD::MUL, N->getValueType(0)))
16642 return SDValue();
16643
16644 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16645 switch (this->Subtarget.getCPUDirective()) {
16646 default:
16647 // TODO: enhance the condition for subtarget before pwr8
16648 return false;
16649 case PPC::DIR_PWR8:
16650 // type mul add shl
16651 // scalar 4 1 1
16652 // vector 7 2 2
16653 return true;
16654 case PPC::DIR_PWR9:
16655 case PPC::DIR_PWR10:
16656 case PPC::DIR_PWR_FUTURE:
16657 // type mul add shl
16658 // scalar 5 2 2
16659 // vector 7 2 2
16660
16661 // The cycle RATIO of related operations are showed as a table above.
16662 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16663 // scalar and vector type. For 2 instrs patterns, add/sub + shl
16664 // are 4, it is always profitable; but for 3 instrs patterns
16665 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16666 // So we should only do it for vector type.
16667 return IsAddOne && IsNeg ? VT.isVector() : true;
16668 }
16669 };
16670
16671 EVT VT = N->getValueType(0);
16672 SDLoc DL(N);
16673
16674 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16675 bool IsNeg = MulAmt.isNegative();
16676 APInt MulAmtAbs = MulAmt.abs();
16677
16678 if ((MulAmtAbs - 1).isPowerOf2()) {
16679 // (mul x, 2^N + 1) => (add (shl x, N), x)
16680 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16681
16682 if (!IsProfitable(IsNeg, true, VT))
16683 return SDValue();
16684
16685 SDValue Op0 = N->getOperand(0);
16686 SDValue Op1 =
16687 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16688 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16689 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16690
16691 if (!IsNeg)
16692 return Res;
16693
16694 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16695 } else if ((MulAmtAbs + 1).isPowerOf2()) {
16696 // (mul x, 2^N - 1) => (sub (shl x, N), x)
16697 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16698
16699 if (!IsProfitable(IsNeg, false, VT))
16700 return SDValue();
16701
16702 SDValue Op0 = N->getOperand(0);
16703 SDValue Op1 =
16704 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16705 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16706
16707 if (!IsNeg)
16708 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16709 else
16710 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16711
16712 } else {
16713 return SDValue();
16714 }
16715}
16716
16717// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16718// in combiner since we need to check SD flags and other subtarget features.
16719SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16720 DAGCombinerInfo &DCI) const {
16721 SDValue N0 = N->getOperand(0);
16722 SDValue N1 = N->getOperand(1);
16723 SDValue N2 = N->getOperand(2);
16724 SDNodeFlags Flags = N->getFlags();
16725 EVT VT = N->getValueType(0);
16726 SelectionDAG &DAG = DCI.DAG;
16727 const TargetOptions &Options = getTargetMachine().Options;
16728 unsigned Opc = N->getOpcode();
16729 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16730 bool LegalOps = !DCI.isBeforeLegalizeOps();
16731 SDLoc Loc(N);
16732
16733 if (!isOperationLegal(ISD::FMA, VT))
16734 return SDValue();
16735
16736 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16737 // since (fnmsub a b c)=-0 while c-ab=+0.
16738 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16739 return SDValue();
16740
16741 // (fma (fneg a) b c) => (fnmsub a b c)
16742 // (fnmsub (fneg a) b c) => (fma a b c)
16743 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16744 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16745
16746 // (fma a (fneg b) c) => (fnmsub a b c)
16747 // (fnmsub a (fneg b) c) => (fma a b c)
16748 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16749 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16750
16751 return SDValue();
16752}
16753
16754bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16755 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16756 if (!Subtarget.is64BitELFABI())
16757 return false;
16758
16759 // If not a tail call then no need to proceed.
16760 if (!CI->isTailCall())
16761 return false;
16762
16763 // If sibling calls have been disabled and tail-calls aren't guaranteed
16764 // there is no reason to duplicate.
16765 auto &TM = getTargetMachine();
16766 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16767 return false;
16768
16769 // Can't tail call a function called indirectly, or if it has variadic args.
16770 const Function *Callee = CI->getCalledFunction();
16771 if (!Callee || Callee->isVarArg())
16772 return false;
16773
16774 // Make sure the callee and caller calling conventions are eligible for tco.
16775 const Function *Caller = CI->getParent()->getParent();
16776 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16777 CI->getCallingConv()))
16778 return false;
16779
16780 // If the function is local then we have a good chance at tail-calling it
16781 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16782}
16783
16784bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16785 if (!Subtarget.hasVSX())
16786 return false;
16787 if (Subtarget.hasP9Vector() && VT == MVT::f128)
16788 return true;
16789 return VT == MVT::f32 || VT == MVT::f64 ||
16790 VT == MVT::v4f32 || VT == MVT::v2f64;
16791}
16792
16793bool PPCTargetLowering::
16794isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16795 const Value *Mask = AndI.getOperand(1);
16796 // If the mask is suitable for andi. or andis. we should sink the and.
16797 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16798 // Can't handle constants wider than 64-bits.
16799 if (CI->getBitWidth() > 64)
16800 return false;
16801 int64_t ConstVal = CI->getZExtValue();
16802 return isUInt<16>(ConstVal) ||
16803 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16804 }
16805
16806 // For non-constant masks, we can always use the record-form and.
16807 return true;
16808}
16809
16810// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16811// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16812// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16813// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16814// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16815SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16816 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(((N->getOpcode() == ISD::ABS) && "Need ABS node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16816, __PRETTY_FUNCTION__))
;
16817 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__))
16818 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__))
;
16819 EVT VT = N->getValueType(0);
16820 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16821 return SDValue();
16822
16823 SelectionDAG &DAG = DCI.DAG;
16824 SDLoc dl(N);
16825 if (N->getOperand(0).getOpcode() == ISD::SUB) {
16826 // Even for signed integers, if it's known to be positive (as signed
16827 // integer) due to zero-extended inputs.
16828 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16829 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16830 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16831 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16832 (SubOpcd1 == ISD::ZERO_EXTEND ||
16833 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16834 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16835 N->getOperand(0)->getOperand(0),
16836 N->getOperand(0)->getOperand(1),
16837 DAG.getTargetConstant(0, dl, MVT::i32));
16838 }
16839
16840 // For type v4i32, it can be optimized with xvnegsp + vabsduw
16841 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16842 N->getOperand(0).hasOneUse()) {
16843 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16844 N->getOperand(0)->getOperand(0),
16845 N->getOperand(0)->getOperand(1),
16846 DAG.getTargetConstant(1, dl, MVT::i32));
16847 }
16848 }
16849
16850 return SDValue();
16851}
16852
16853// For type v4i32/v8ii16/v16i8, transform
16854// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16855// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16856// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16857// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16858SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16859 DAGCombinerInfo &DCI) const {
16860 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16860, __PRETTY_FUNCTION__))
;
16861 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__))
16862 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12~++20201026111116+d3205bbca3e/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__))
;
16863
16864 SelectionDAG &DAG = DCI.DAG;
16865 SDLoc dl(N);
16866 SDValue Cond = N->getOperand(0);
16867 SDValue TrueOpnd = N->getOperand(1);
16868 SDValue FalseOpnd = N->getOperand(2);
16869 EVT VT = N->getOperand(1).getValueType();
16870
16871 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16872 FalseOpnd.getOpcode() != ISD::SUB)
16873 return SDValue();
16874
16875 // ABSD only available for type v4i32/v8i16/v16i8
16876 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16877 return SDValue();
16878
16879 // At least to save one more dependent computation
16880 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16881 return SDValue();
16882
16883 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16884
16885 // Can only handle unsigned comparison here
16886 switch (CC) {
16887 default:
16888 return SDValue();
16889 case ISD::SETUGT:
16890 case ISD::SETUGE:
16891 break;
16892 case ISD::SETULT:
16893 case ISD::SETULE:
16894 std::swap(TrueOpnd, FalseOpnd);
16895 break;
16896 }
16897
16898 SDValue CmpOpnd1 = Cond.getOperand(0);
16899 SDValue CmpOpnd2 = Cond.getOperand(1);
16900
16901 // SETCC CmpOpnd1 CmpOpnd2 cond
16902 // TrueOpnd = CmpOpnd1 - CmpOpnd2
16903 // FalseOpnd = CmpOpnd2 - CmpOpnd1
16904 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16905 TrueOpnd.getOperand(1) == CmpOpnd2 &&
16906 FalseOpnd.getOperand(0) == CmpOpnd2 &&
16907 FalseOpnd.getOperand(1) == CmpOpnd1) {
16908 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16909 CmpOpnd1, CmpOpnd2,
16910 DAG.getTargetConstant(0, dl, MVT::i32));
16911 }
16912
16913 return SDValue();
16914}