Bug Summary

File:llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Warning:line 9266, column 36
Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PPCISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/build-llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/build-llvm/include -I /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/build-llvm/lib/Target/PowerPC -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-11-21-121427-42170-1 -x c++ /build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCPredicates.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/StringSwitch.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RuntimeLibcalls.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCRegisterInfo.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MachineValueType.h"
90#include "llvm/Support/MathExtras.h"
91#include "llvm/Support/raw_ostream.h"
92#include "llvm/Target/TargetMachine.h"
93#include "llvm/Target/TargetOptions.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE"ppc-lowering" "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"ppc-lowering", "NumTailCalls"
, "Number of tail calls"}
;
125STATISTIC(NumSiblingCalls, "Number of sibling calls")static llvm::Statistic NumSiblingCalls = {"ppc-lowering", "NumSiblingCalls"
, "Number of sibling calls"}
;
126STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM")static llvm::Statistic ShufflesHandledWithVPERM = {"ppc-lowering"
, "ShufflesHandledWithVPERM", "Number of shuffles lowered to a VPERM"
}
;
127STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed")static llvm::Statistic NumDynamicAllocaProbed = {"ppc-lowering"
, "NumDynamicAllocaProbed", "Number of dynamic stack allocation probed"
}
;
128
129static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
130
131static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
132
133// FIXME: Remove this once the bug has been fixed!
134extern cl::opt<bool> ANDIGlueBug;
135
136PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
137 const PPCSubtarget &STI)
138 : TargetLowering(TM), Subtarget(STI) {
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64 = Subtarget.isPPC64();
142 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
143
144 // Set up the register classes.
145 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146 if (!useSoftFloat()) {
147 if (hasSPE()) {
148 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
149 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150 } else {
151 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153 }
154 }
155
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT : MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167 }
168
169 if (Subtarget.isISA3_0()) {
170 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
171 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
172 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
173 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
174 } else {
175 // No extending loads from f16 or HW conversions back and forth.
176 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
177 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
178 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
179 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
180 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
181 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
182 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
183 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
184 }
185
186 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187
188 // PowerPC has pre-inc load and store's.
189 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
190 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
191 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
192 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
193 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
194 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
195 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
196 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
197 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
198 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
199 if (!Subtarget.hasSPE()) {
200 setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
201 setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
202 setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
203 setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
204 }
205
206 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
207 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
208 for (MVT VT : ScalarIntVTs) {
209 setOperationAction(ISD::ADDC, VT, Legal);
210 setOperationAction(ISD::ADDE, VT, Legal);
211 setOperationAction(ISD::SUBC, VT, Legal);
212 setOperationAction(ISD::SUBE, VT, Legal);
213 }
214
215 if (Subtarget.useCRBits()) {
216 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
217
218 if (isPPC64 || Subtarget.hasFPCVT()) {
219 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
220 AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
221 isPPC64 ? MVT::i64 : MVT::i32);
222 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
223 AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
224 isPPC64 ? MVT::i64 : MVT::i32);
225
226 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
227 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
228 isPPC64 ? MVT::i64 : MVT::i32);
229 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
230 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
231 isPPC64 ? MVT::i64 : MVT::i32);
232 } else {
233 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
234 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
235 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
236 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
237 }
238
239 // PowerPC does not support direct load/store of condition registers.
240 setOperationAction(ISD::LOAD, MVT::i1, Custom);
241 setOperationAction(ISD::STORE, MVT::i1, Custom);
242
243 // FIXME: Remove this once the ANDI glue bug is fixed:
244 if (ANDIGlueBug)
245 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
246
247 for (MVT VT : MVT::integer_valuetypes()) {
248 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
249 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
250 setTruncStoreAction(VT, MVT::i1, Expand);
251 }
252
253 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
254 }
255
256 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
257 // PPC (the libcall is not available).
258 setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
259 setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
260 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
261 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
262
263 // We do not currently implement these libm ops for PowerPC.
264 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
265 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
266 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
267 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
268 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
269 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
270
271 // PowerPC has no SREM/UREM instructions unless we are on P9
272 // On P9 we may use a hardware instruction to compute the remainder.
273 // When the result of both the remainder and the division is required it is
274 // more efficient to compute the remainder from the result of the division
275 // rather than use the remainder instruction. The instructions are legalized
276 // directly because the DivRemPairsPass performs the transformation at the IR
277 // level.
278 if (Subtarget.isISA3_0()) {
279 setOperationAction(ISD::SREM, MVT::i32, Legal);
280 setOperationAction(ISD::UREM, MVT::i32, Legal);
281 setOperationAction(ISD::SREM, MVT::i64, Legal);
282 setOperationAction(ISD::UREM, MVT::i64, Legal);
283 } else {
284 setOperationAction(ISD::SREM, MVT::i32, Expand);
285 setOperationAction(ISD::UREM, MVT::i32, Expand);
286 setOperationAction(ISD::SREM, MVT::i64, Expand);
287 setOperationAction(ISD::UREM, MVT::i64, Expand);
288 }
289
290 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
291 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
292 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
293 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
294 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
295 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
296 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
297 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
298 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
299
300 // Handle constrained floating-point operations of scalar.
301 // TODO: Handle SPE specific operation.
302 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
303 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
304 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
305 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
306 setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
307 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
308
309 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
310 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
311 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
312 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
313 setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
314 if (Subtarget.hasVSX()) {
315 setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
316 setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
317 }
318
319 if (Subtarget.hasFSQRT()) {
320 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
321 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
322 }
323
324 if (Subtarget.hasFPRND()) {
325 setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
326 setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
327 setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
328 setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
329
330 setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
331 setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
332 setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
333 setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
334 }
335
336 // We don't support sin/cos/sqrt/fmod/pow
337 setOperationAction(ISD::FSIN , MVT::f64, Expand);
338 setOperationAction(ISD::FCOS , MVT::f64, Expand);
339 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
340 setOperationAction(ISD::FREM , MVT::f64, Expand);
341 setOperationAction(ISD::FPOW , MVT::f64, Expand);
342 setOperationAction(ISD::FSIN , MVT::f32, Expand);
343 setOperationAction(ISD::FCOS , MVT::f32, Expand);
344 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
345 setOperationAction(ISD::FREM , MVT::f32, Expand);
346 setOperationAction(ISD::FPOW , MVT::f32, Expand);
347 if (Subtarget.hasSPE()) {
348 setOperationAction(ISD::FMA , MVT::f64, Expand);
349 setOperationAction(ISD::FMA , MVT::f32, Expand);
350 } else {
351 setOperationAction(ISD::FMA , MVT::f64, Legal);
352 setOperationAction(ISD::FMA , MVT::f32, Legal);
353 }
354
355 if (Subtarget.hasSPE())
356 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
357
358 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
359
360 // If we're enabling GP optimizations, use hardware square root
361 if (!Subtarget.hasFSQRT() &&
362 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
363 Subtarget.hasFRE()))
364 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
365
366 if (!Subtarget.hasFSQRT() &&
367 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
368 Subtarget.hasFRES()))
369 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
370
371 if (Subtarget.hasFCPSGN()) {
372 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
373 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
374 } else {
375 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
376 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
377 }
378
379 if (Subtarget.hasFPRND()) {
380 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
381 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
382 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
383 setOperationAction(ISD::FROUND, MVT::f64, Legal);
384
385 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
386 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
387 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
388 setOperationAction(ISD::FROUND, MVT::f32, Legal);
389 }
390
391 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
392 // to speed up scalar BSWAP64.
393 // CTPOP or CTTZ were introduced in P8/P9 respectively
394 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
395 if (Subtarget.hasP9Vector())
396 setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
397 else
398 setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
399 if (Subtarget.isISA3_0()) {
400 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
401 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
402 } else {
403 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
404 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
405 }
406
407 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
408 setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
409 setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
410 } else {
411 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
412 setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
413 }
414
415 // PowerPC does not have ROTR
416 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
417 setOperationAction(ISD::ROTR, MVT::i64 , Expand);
418
419 if (!Subtarget.useCRBits()) {
420 // PowerPC does not have Select
421 setOperationAction(ISD::SELECT, MVT::i32, Expand);
422 setOperationAction(ISD::SELECT, MVT::i64, Expand);
423 setOperationAction(ISD::SELECT, MVT::f32, Expand);
424 setOperationAction(ISD::SELECT, MVT::f64, Expand);
425 }
426
427 // PowerPC wants to turn select_cc of FP into fsel when possible.
428 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
429 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
430
431 // PowerPC wants to optimize integer setcc a bit
432 if (!Subtarget.useCRBits())
433 setOperationAction(ISD::SETCC, MVT::i32, Custom);
434
435 if (Subtarget.hasFPU()) {
436 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
437 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
438 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
439
440 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
441 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
443 }
444
445 // PowerPC does not have BRCOND which requires SetCC
446 if (!Subtarget.useCRBits())
447 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
448
449 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
450
451 if (Subtarget.hasSPE()) {
452 // SPE has built-in conversions
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
454 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
455 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
456 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
457 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
458 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
459 } else {
460 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
461 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
462 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
463
464 // PowerPC does not have [U|S]INT_TO_FP
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
466 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
467 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
469 }
470
471 if (Subtarget.hasDirectMove() && isPPC64) {
472 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
473 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
474 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
475 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
476 if (TM.Options.UnsafeFPMath) {
477 setOperationAction(ISD::LRINT, MVT::f64, Legal);
478 setOperationAction(ISD::LRINT, MVT::f32, Legal);
479 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
480 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
481 setOperationAction(ISD::LROUND, MVT::f64, Legal);
482 setOperationAction(ISD::LROUND, MVT::f32, Legal);
483 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
484 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
485 }
486 } else {
487 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
488 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
489 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
490 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
491 }
492
493 // We cannot sextinreg(i1). Expand to shifts.
494 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
495
496 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
497 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
498 // support continuation, user-level threading, and etc.. As a result, no
499 // other SjLj exception interfaces are implemented and please don't build
500 // your own exception handling based on them.
501 // LLVM/Clang supports zero-cost DWARF exception handling.
502 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
503 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
504
505 // We want to legalize GlobalAddress and ConstantPool nodes into the
506 // appropriate instructions to materialize the address.
507 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
508 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
509 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
510 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
511 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
512 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
513 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
514 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
515 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
516 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
517
518 // TRAP is legal.
519 setOperationAction(ISD::TRAP, MVT::Other, Legal);
520
521 // TRAMPOLINE is custom lowered.
522 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
523 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
524
525 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
526 setOperationAction(ISD::VASTART , MVT::Other, Custom);
527
528 if (Subtarget.is64BitELFABI()) {
529 // VAARG always uses double-word chunks, so promote anything smaller.
530 setOperationAction(ISD::VAARG, MVT::i1, Promote);
531 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
532 setOperationAction(ISD::VAARG, MVT::i8, Promote);
533 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
534 setOperationAction(ISD::VAARG, MVT::i16, Promote);
535 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
536 setOperationAction(ISD::VAARG, MVT::i32, Promote);
537 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
538 setOperationAction(ISD::VAARG, MVT::Other, Expand);
539 } else if (Subtarget.is32BitELFABI()) {
540 // VAARG is custom lowered with the 32-bit SVR4 ABI.
541 setOperationAction(ISD::VAARG, MVT::Other, Custom);
542 setOperationAction(ISD::VAARG, MVT::i64, Custom);
543 } else
544 setOperationAction(ISD::VAARG, MVT::Other, Expand);
545
546 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
547 if (Subtarget.is32BitELFABI())
548 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
549 else
550 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
551
552 // Use the default implementation.
553 setOperationAction(ISD::VAEND , MVT::Other, Expand);
554 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
555 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
556 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
557 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
558 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
559 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
560 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
561 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
562
563 // We want to custom lower some of our intrinsics.
564 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
565
566 // To handle counter-based loop conditions.
567 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
568
569 setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
570 setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
571 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
572 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
573
574 // Comparisons that require checking two conditions.
575 if (Subtarget.hasSPE()) {
576 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
577 setCondCodeAction(ISD::SETO, MVT::f64, Expand);
578 setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
579 setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
580 }
581 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
582 setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
583 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
584 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
585 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
586 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
587 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
588 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
589 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
590 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
591 setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
592 setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
593
594 if (Subtarget.has64BitSupport()) {
595 // They also have instructions for converting between i64 and fp.
596 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
597 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
598 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
599 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
600 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
601 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
602 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
603 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
604 // This is just the low 32 bits of a (signed) fp->i64 conversion.
605 // We cannot do this with Promote because i64 is not a legal type.
606 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
607 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
608
609 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
610 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
611 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
612 }
613 } else {
614 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
615 if (Subtarget.hasSPE()) {
616 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
617 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
618 } else {
619 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
620 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
621 }
622 }
623
624 // With the instructions enabled under FPCVT, we can do everything.
625 if (Subtarget.hasFPCVT()) {
626 if (Subtarget.has64BitSupport()) {
627 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
628 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
629 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
630 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
631 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
632 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
633 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
634 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
635 }
636
637 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
638 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
639 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
640 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
641 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
642 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
643 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
644 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
645 }
646
647 if (Subtarget.use64BitRegs()) {
648 // 64-bit PowerPC implementations can support i64 types directly
649 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
650 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
651 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
652 // 64-bit PowerPC wants to expand i128 shifts itself.
653 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
654 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
655 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
656 } else {
657 // 32-bit PowerPC wants to expand i64 shifts itself.
658 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
659 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
660 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
661 }
662
663 // PowerPC has better expansions for funnel shifts than the generic
664 // TargetLowering::expandFunnelShift.
665 if (Subtarget.has64BitSupport()) {
666 setOperationAction(ISD::FSHL, MVT::i64, Custom);
667 setOperationAction(ISD::FSHR, MVT::i64, Custom);
668 }
669 setOperationAction(ISD::FSHL, MVT::i32, Custom);
670 setOperationAction(ISD::FSHR, MVT::i32, Custom);
671
672 if (Subtarget.hasVSX()) {
673 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
674 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
675 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
676 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
677 }
678
679 if (Subtarget.hasAltivec()) {
680 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
681 setOperationAction(ISD::SADDSAT, VT, Legal);
682 setOperationAction(ISD::SSUBSAT, VT, Legal);
683 setOperationAction(ISD::UADDSAT, VT, Legal);
684 setOperationAction(ISD::USUBSAT, VT, Legal);
685 }
686 // First set operation action for all vector types to expand. Then we
687 // will selectively turn on ones that can be effectively codegen'd.
688 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
689 // add/sub are legal for all supported vector VT's.
690 setOperationAction(ISD::ADD, VT, Legal);
691 setOperationAction(ISD::SUB, VT, Legal);
692
693 // For v2i64, these are only valid with P8Vector. This is corrected after
694 // the loop.
695 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
696 setOperationAction(ISD::SMAX, VT, Legal);
697 setOperationAction(ISD::SMIN, VT, Legal);
698 setOperationAction(ISD::UMAX, VT, Legal);
699 setOperationAction(ISD::UMIN, VT, Legal);
700 }
701 else {
702 setOperationAction(ISD::SMAX, VT, Expand);
703 setOperationAction(ISD::SMIN, VT, Expand);
704 setOperationAction(ISD::UMAX, VT, Expand);
705 setOperationAction(ISD::UMIN, VT, Expand);
706 }
707
708 if (Subtarget.hasVSX()) {
709 setOperationAction(ISD::FMAXNUM, VT, Legal);
710 setOperationAction(ISD::FMINNUM, VT, Legal);
711 }
712
713 // Vector instructions introduced in P8
714 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
715 setOperationAction(ISD::CTPOP, VT, Legal);
716 setOperationAction(ISD::CTLZ, VT, Legal);
717 }
718 else {
719 setOperationAction(ISD::CTPOP, VT, Expand);
720 setOperationAction(ISD::CTLZ, VT, Expand);
721 }
722
723 // Vector instructions introduced in P9
724 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
725 setOperationAction(ISD::CTTZ, VT, Legal);
726 else
727 setOperationAction(ISD::CTTZ, VT, Expand);
728
729 // We promote all shuffles to v16i8.
730 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
731 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
732
733 // We promote all non-typed operations to v4i32.
734 setOperationAction(ISD::AND , VT, Promote);
735 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
736 setOperationAction(ISD::OR , VT, Promote);
737 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
738 setOperationAction(ISD::XOR , VT, Promote);
739 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
740 setOperationAction(ISD::LOAD , VT, Promote);
741 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
742 setOperationAction(ISD::SELECT, VT, Promote);
743 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
744 setOperationAction(ISD::VSELECT, VT, Legal);
745 setOperationAction(ISD::SELECT_CC, VT, Promote);
746 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
747 setOperationAction(ISD::STORE, VT, Promote);
748 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
749
750 // No other operations are legal.
751 setOperationAction(ISD::MUL , VT, Expand);
752 setOperationAction(ISD::SDIV, VT, Expand);
753 setOperationAction(ISD::SREM, VT, Expand);
754 setOperationAction(ISD::UDIV, VT, Expand);
755 setOperationAction(ISD::UREM, VT, Expand);
756 setOperationAction(ISD::FDIV, VT, Expand);
757 setOperationAction(ISD::FREM, VT, Expand);
758 setOperationAction(ISD::FNEG, VT, Expand);
759 setOperationAction(ISD::FSQRT, VT, Expand);
760 setOperationAction(ISD::FLOG, VT, Expand);
761 setOperationAction(ISD::FLOG10, VT, Expand);
762 setOperationAction(ISD::FLOG2, VT, Expand);
763 setOperationAction(ISD::FEXP, VT, Expand);
764 setOperationAction(ISD::FEXP2, VT, Expand);
765 setOperationAction(ISD::FSIN, VT, Expand);
766 setOperationAction(ISD::FCOS, VT, Expand);
767 setOperationAction(ISD::FABS, VT, Expand);
768 setOperationAction(ISD::FFLOOR, VT, Expand);
769 setOperationAction(ISD::FCEIL, VT, Expand);
770 setOperationAction(ISD::FTRUNC, VT, Expand);
771 setOperationAction(ISD::FRINT, VT, Expand);
772 setOperationAction(ISD::FNEARBYINT, VT, Expand);
773 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
774 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
775 setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
776 setOperationAction(ISD::MULHU, VT, Expand);
777 setOperationAction(ISD::MULHS, VT, Expand);
778 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
779 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
780 setOperationAction(ISD::UDIVREM, VT, Expand);
781 setOperationAction(ISD::SDIVREM, VT, Expand);
782 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
783 setOperationAction(ISD::FPOW, VT, Expand);
784 setOperationAction(ISD::BSWAP, VT, Expand);
785 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
786 setOperationAction(ISD::ROTL, VT, Expand);
787 setOperationAction(ISD::ROTR, VT, Expand);
788
789 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
790 setTruncStoreAction(VT, InnerVT, Expand);
791 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
792 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
793 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
794 }
795 }
796 setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
797 if (!Subtarget.hasP8Vector()) {
798 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
799 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
800 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
801 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
802 }
803
804 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
805 setOperationAction(ISD::ABS, VT, Custom);
806
807 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
808 // with merges, splats, etc.
809 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
810
811 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
812 // are cheap, so handle them before they get expanded to scalar.
813 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
814 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
815 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
816 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
817 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
818
819 setOperationAction(ISD::AND , MVT::v4i32, Legal);
820 setOperationAction(ISD::OR , MVT::v4i32, Legal);
821 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
822 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
823 setOperationAction(ISD::SELECT, MVT::v4i32,
824 Subtarget.useCRBits() ? Legal : Expand);
825 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
826 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
827 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
828 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
829 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
830 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
831 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
832 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
833 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
834 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
835 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
836 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
837 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
838
839 // Without hasP8Altivec set, v2i64 SMAX isn't available.
840 // But ABS custom lowering requires SMAX support.
841 if (!Subtarget.hasP8Altivec())
842 setOperationAction(ISD::ABS, MVT::v2i64, Expand);
843
844 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
845 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
846 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
847 if (Subtarget.hasAltivec())
848 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
849 setOperationAction(ISD::ROTL, VT, Legal);
850 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
851 if (Subtarget.hasP8Altivec())
852 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
853
854 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
855 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
856 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
857 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
858
859 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
860 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
861
862 if (Subtarget.hasVSX()) {
863 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
864 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
865 }
866
867 if (Subtarget.hasP8Altivec())
868 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
869 else
870 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
871
872 if (Subtarget.isISA3_1()) {
873 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
874 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
875 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
876 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
877 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
878 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
879 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
880 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
881 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
882 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
883 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
884 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
885 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
886 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
887 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
888 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
889 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
890 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
891 }
892
893 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
894 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
895
896 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
897 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
898
899 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
900 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
901 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
902 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
903
904 // Altivec does not contain unordered floating-point compare instructions
905 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
906 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
907 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
908 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
909
910 if (Subtarget.hasVSX()) {
911 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
912 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
913 if (Subtarget.hasP8Vector()) {
914 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
915 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
916 }
917 if (Subtarget.hasDirectMove() && isPPC64) {
918 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
919 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
920 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
921 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
922 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
923 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
924 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
925 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
926 }
927 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
928
929 // The nearbyint variants are not allowed to raise the inexact exception
930 // so we can only code-gen them with unsafe math.
931 if (TM.Options.UnsafeFPMath) {
932 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
933 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
934 }
935
936 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
937 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
938 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
939 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
940 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
941 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
942 setOperationAction(ISD::FROUND, MVT::f64, Legal);
943 setOperationAction(ISD::FRINT, MVT::f64, Legal);
944
945 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
946 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
947 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
948 setOperationAction(ISD::FROUND, MVT::f32, Legal);
949 setOperationAction(ISD::FRINT, MVT::f32, Legal);
950
951 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
952 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
953
954 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
955 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
956
957 // Share the Altivec comparison restrictions.
958 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
959 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
960 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
961 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
962
963 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
964 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
965
966 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
967
968 if (Subtarget.hasP8Vector())
969 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
970
971 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
972
973 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
974 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
975 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
976
977 if (Subtarget.hasP8Altivec()) {
978 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
979 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
980 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
981
982 // 128 bit shifts can be accomplished via 3 instructions for SHL and
983 // SRL, but not for SRA because of the instructions available:
984 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
985 // doing
986 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
987 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
988 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
989
990 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
991 }
992 else {
993 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
994 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
995 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
996
997 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
998
999 // VSX v2i64 only supports non-arithmetic operations.
1000 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1001 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1002 }
1003
1004 if (Subtarget.isISA3_1())
1005 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1006 else
1007 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1008
1009 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1010 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1011 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1012 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1013
1014 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1015
1016 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1017 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1018 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1019 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1020 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1021 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1022 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1023 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1024
1025 // Custom handling for partial vectors of integers converted to
1026 // floating point. We already have optimal handling for v2i32 through
1027 // the DAG combine, so those aren't necessary.
1028 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1029 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1030 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1031 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1032 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1033 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1034 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1035 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1036 setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1037 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1038 setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1039 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1040 setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1041 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1042 setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1043 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1044
1045 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1046 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1047 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1048 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1049 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1051
1052 if (Subtarget.hasDirectMove())
1053 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1054 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1055
1056 // Handle constrained floating-point operations of vector.
1057 // The predictor is `hasVSX` because altivec instruction has
1058 // no exception but VSX vector instruction has.
1059 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1060 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1061 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1062 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1063 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1064 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1065 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1066 setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1067 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1068 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1069 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1070 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1071 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1072
1073 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1074 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1075 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1076 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1077 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1078 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1079 setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1080 setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1081 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1082 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1083 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1084 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1085 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1086
1087 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1088 }
1089
1090 if (Subtarget.hasP8Altivec()) {
1091 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1092 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1093 }
1094
1095 if (Subtarget.hasP9Vector()) {
1096 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1097 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1098
1099 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1100 // SRL, but not for SRA because of the instructions available:
1101 // VS{RL} and VS{RL}O.
1102 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1103 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1104 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1105
1106 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1107 setOperationAction(ISD::FADD, MVT::f128, Legal);
1108 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1109 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1110 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1111 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1112 // No extending loads to f128 on PPC.
1113 for (MVT FPT : MVT::fp_valuetypes())
1114 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1115 setOperationAction(ISD::FMA, MVT::f128, Legal);
1116 setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1117 setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1118 setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1119 setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1120 setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1121 setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1122
1123 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1124 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1125 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1126 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1127 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1128 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1129
1130 setOperationAction(ISD::SELECT, MVT::f128, Expand);
1131 setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1132 setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1133 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1134 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1135 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1136 // No implementation for these ops for PowerPC.
1137 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1138 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1139 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1140 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1141 setOperationAction(ISD::FREM, MVT::f128, Expand);
1142
1143 // Handle constrained floating-point operations of fp128
1144 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1145 setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1146 setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1147 setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1148 setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1149 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1150 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1151 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1152 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1153 setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1154 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1155 setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1156 setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1157 setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1158 setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1159 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1160 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1161 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1162 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1163 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1164 }
1165
1166 if (Subtarget.hasP9Altivec()) {
1167 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1168 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1169
1170 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1171 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1172 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1173 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1174 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1175 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1177 }
1178 }
1179
1180 if (Subtarget.pairedVectorMemops()) {
1181 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1182 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1183 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1184 }
1185 if (Subtarget.hasMMA()) {
1186 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1187 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1188 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1189 setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1190 }
1191
1192 if (Subtarget.has64BitSupport())
1193 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1194
1195 if (Subtarget.isISA3_1())
1196 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1197
1198 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1199
1200 if (!isPPC64) {
1201 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1202 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1203 }
1204
1205 setBooleanContents(ZeroOrOneBooleanContent);
1206
1207 if (Subtarget.hasAltivec()) {
1208 // Altivec instructions set fields to all zeros or all ones.
1209 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1210 }
1211
1212 if (!isPPC64) {
1213 // These libcalls are not available in 32-bit.
1214 setLibcallName(RTLIB::SHL_I128, nullptr);
1215 setLibcallName(RTLIB::SRL_I128, nullptr);
1216 setLibcallName(RTLIB::SRA_I128, nullptr);
1217 }
1218
1219 if (!isPPC64)
1220 setMaxAtomicSizeInBitsSupported(32);
1221
1222 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1223
1224 // We have target-specific dag combine patterns for the following nodes:
1225 setTargetDAGCombine(ISD::ADD);
1226 setTargetDAGCombine(ISD::SHL);
1227 setTargetDAGCombine(ISD::SRA);
1228 setTargetDAGCombine(ISD::SRL);
1229 setTargetDAGCombine(ISD::MUL);
1230 setTargetDAGCombine(ISD::FMA);
1231 setTargetDAGCombine(ISD::SINT_TO_FP);
1232 setTargetDAGCombine(ISD::BUILD_VECTOR);
1233 if (Subtarget.hasFPCVT())
1234 setTargetDAGCombine(ISD::UINT_TO_FP);
1235 setTargetDAGCombine(ISD::LOAD);
1236 setTargetDAGCombine(ISD::STORE);
1237 setTargetDAGCombine(ISD::BR_CC);
1238 if (Subtarget.useCRBits())
1239 setTargetDAGCombine(ISD::BRCOND);
1240 setTargetDAGCombine(ISD::BSWAP);
1241 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1242 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1243 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1244
1245 setTargetDAGCombine(ISD::SIGN_EXTEND);
1246 setTargetDAGCombine(ISD::ZERO_EXTEND);
1247 setTargetDAGCombine(ISD::ANY_EXTEND);
1248
1249 setTargetDAGCombine(ISD::TRUNCATE);
1250 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1251
1252
1253 if (Subtarget.useCRBits()) {
1254 setTargetDAGCombine(ISD::TRUNCATE);
1255 setTargetDAGCombine(ISD::SETCC);
1256 setTargetDAGCombine(ISD::SELECT_CC);
1257 }
1258
1259 if (Subtarget.hasP9Altivec()) {
1260 setTargetDAGCombine(ISD::ABS);
1261 setTargetDAGCombine(ISD::VSELECT);
1262 }
1263
1264 setLibcallName(RTLIB::LOG_F128, "logf128");
1265 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1266 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1267 setLibcallName(RTLIB::EXP_F128, "expf128");
1268 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1269 setLibcallName(RTLIB::SIN_F128, "sinf128");
1270 setLibcallName(RTLIB::COS_F128, "cosf128");
1271 setLibcallName(RTLIB::POW_F128, "powf128");
1272 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1273 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1274 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1275 setLibcallName(RTLIB::REM_F128, "fmodf128");
1276
1277 // With 32 condition bits, we don't need to sink (and duplicate) compares
1278 // aggressively in CodeGenPrep.
1279 if (Subtarget.useCRBits()) {
1280 setHasMultipleConditionRegisters();
1281 setJumpIsExpensive();
1282 }
1283
1284 setMinFunctionAlignment(Align(4));
1285
1286 switch (Subtarget.getCPUDirective()) {
1287 default: break;
1288 case PPC::DIR_970:
1289 case PPC::DIR_A2:
1290 case PPC::DIR_E500:
1291 case PPC::DIR_E500mc:
1292 case PPC::DIR_E5500:
1293 case PPC::DIR_PWR4:
1294 case PPC::DIR_PWR5:
1295 case PPC::DIR_PWR5X:
1296 case PPC::DIR_PWR6:
1297 case PPC::DIR_PWR6X:
1298 case PPC::DIR_PWR7:
1299 case PPC::DIR_PWR8:
1300 case PPC::DIR_PWR9:
1301 case PPC::DIR_PWR10:
1302 case PPC::DIR_PWR_FUTURE:
1303 setPrefLoopAlignment(Align(16));
1304 setPrefFunctionAlignment(Align(16));
1305 break;
1306 }
1307
1308 if (Subtarget.enableMachineScheduler())
1309 setSchedulingPreference(Sched::Source);
1310 else
1311 setSchedulingPreference(Sched::Hybrid);
1312
1313 computeRegisterProperties(STI.getRegisterInfo());
1314
1315 // The Freescale cores do better with aggressive inlining of memcpy and
1316 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1317 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1318 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1319 MaxStoresPerMemset = 32;
1320 MaxStoresPerMemsetOptSize = 16;
1321 MaxStoresPerMemcpy = 32;
1322 MaxStoresPerMemcpyOptSize = 8;
1323 MaxStoresPerMemmove = 32;
1324 MaxStoresPerMemmoveOptSize = 8;
1325 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1326 // The A2 also benefits from (very) aggressive inlining of memcpy and
1327 // friends. The overhead of a the function call, even when warm, can be
1328 // over one hundred cycles.
1329 MaxStoresPerMemset = 128;
1330 MaxStoresPerMemcpy = 128;
1331 MaxStoresPerMemmove = 128;
1332 MaxLoadsPerMemcmp = 128;
1333 } else {
1334 MaxLoadsPerMemcmp = 8;
1335 MaxLoadsPerMemcmpOptSize = 4;
1336 }
1337
1338 IsStrictFPEnabled = true;
1339
1340 // Let the subtarget (CPU) decide if a predictable select is more expensive
1341 // than the corresponding branch. This information is used in CGP to decide
1342 // when to convert selects into branches.
1343 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1344}
1345
1346/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1347/// the desired ByVal argument alignment.
1348static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1349 if (MaxAlign == MaxMaxAlign)
1350 return;
1351 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1352 if (MaxMaxAlign >= 32 &&
1353 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1354 MaxAlign = Align(32);
1355 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1356 MaxAlign < 16)
1357 MaxAlign = Align(16);
1358 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1359 Align EltAlign;
1360 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1361 if (EltAlign > MaxAlign)
1362 MaxAlign = EltAlign;
1363 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1364 for (auto *EltTy : STy->elements()) {
1365 Align EltAlign;
1366 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1367 if (EltAlign > MaxAlign)
1368 MaxAlign = EltAlign;
1369 if (MaxAlign == MaxMaxAlign)
1370 break;
1371 }
1372 }
1373}
1374
1375/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1376/// function arguments in the caller parameter area.
1377unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1378 const DataLayout &DL) const {
1379 // 16byte and wider vectors are passed on 16byte boundary.
1380 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1381 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1382 if (Subtarget.hasAltivec())
1383 getMaxByValAlign(Ty, Alignment, Align(16));
1384 return Alignment.value();
1385}
1386
1387bool PPCTargetLowering::useSoftFloat() const {
1388 return Subtarget.useSoftFloat();
1389}
1390
1391bool PPCTargetLowering::hasSPE() const {
1392 return Subtarget.hasSPE();
1393}
1394
1395bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1396 return VT.isScalarInteger();
1397}
1398
1399const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1400 switch ((PPCISD::NodeType)Opcode) {
1401 case PPCISD::FIRST_NUMBER: break;
1402 case PPCISD::FSEL: return "PPCISD::FSEL";
1403 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1404 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1405 case PPCISD::FCFID: return "PPCISD::FCFID";
1406 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1407 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1408 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1409 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1410 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1411 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1412 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1413 case PPCISD::FP_TO_UINT_IN_VSR:
1414 return "PPCISD::FP_TO_UINT_IN_VSR,";
1415 case PPCISD::FP_TO_SINT_IN_VSR:
1416 return "PPCISD::FP_TO_SINT_IN_VSR";
1417 case PPCISD::FRE: return "PPCISD::FRE";
1418 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1419 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1420 case PPCISD::VPERM: return "PPCISD::VPERM";
1421 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1422 case PPCISD::XXSPLTI_SP_TO_DP:
1423 return "PPCISD::XXSPLTI_SP_TO_DP";
1424 case PPCISD::XXSPLTI32DX:
1425 return "PPCISD::XXSPLTI32DX";
1426 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1427 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1428 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1429 case PPCISD::CMPB: return "PPCISD::CMPB";
1430 case PPCISD::Hi: return "PPCISD::Hi";
1431 case PPCISD::Lo: return "PPCISD::Lo";
1432 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1433 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1434 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1435 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1436 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1437 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1438 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1439 case PPCISD::SRL: return "PPCISD::SRL";
1440 case PPCISD::SRA: return "PPCISD::SRA";
1441 case PPCISD::SHL: return "PPCISD::SHL";
1442 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1443 case PPCISD::CALL: return "PPCISD::CALL";
1444 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1445 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1446 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1447 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1448 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1449 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1450 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1451 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1452 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1453 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1454 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1455 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1456 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1457 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1458 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1459 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1460 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1461 case PPCISD::ANDI_rec_1_EQ_BIT:
1462 return "PPCISD::ANDI_rec_1_EQ_BIT";
1463 case PPCISD::ANDI_rec_1_GT_BIT:
1464 return "PPCISD::ANDI_rec_1_GT_BIT";
1465 case PPCISD::VCMP: return "PPCISD::VCMP";
1466 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1467 case PPCISD::LBRX: return "PPCISD::LBRX";
1468 case PPCISD::STBRX: return "PPCISD::STBRX";
1469 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1470 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1471 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1472 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1473 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1474 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1475 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1476 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1477 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1478 case PPCISD::ST_VSR_SCAL_INT:
1479 return "PPCISD::ST_VSR_SCAL_INT";
1480 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1481 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1482 case PPCISD::BDZ: return "PPCISD::BDZ";
1483 case PPCISD::MFFS: return "PPCISD::MFFS";
1484 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1485 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1486 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1487 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1488 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1489 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1490 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1491 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1492 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1493 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1494 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1495 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1496 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1497 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1498 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1499 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1500 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1501 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1502 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1503 case PPCISD::PADDI_DTPREL:
1504 return "PPCISD::PADDI_DTPREL";
1505 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1506 case PPCISD::SC: return "PPCISD::SC";
1507 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1508 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1509 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1510 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1511 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1512 case PPCISD::VABSD: return "PPCISD::VABSD";
1513 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1514 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1515 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1516 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1517 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1518 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1519 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1520 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1521 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1522 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1523 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1524 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1525 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1526 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1527 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1528 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1529 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1530 case PPCISD::STRICT_FADDRTZ:
1531 return "PPCISD::STRICT_FADDRTZ";
1532 case PPCISD::STRICT_FCTIDZ:
1533 return "PPCISD::STRICT_FCTIDZ";
1534 case PPCISD::STRICT_FCTIWZ:
1535 return "PPCISD::STRICT_FCTIWZ";
1536 case PPCISD::STRICT_FCTIDUZ:
1537 return "PPCISD::STRICT_FCTIDUZ";
1538 case PPCISD::STRICT_FCTIWUZ:
1539 return "PPCISD::STRICT_FCTIWUZ";
1540 case PPCISD::STRICT_FCFID:
1541 return "PPCISD::STRICT_FCFID";
1542 case PPCISD::STRICT_FCFIDU:
1543 return "PPCISD::STRICT_FCFIDU";
1544 case PPCISD::STRICT_FCFIDS:
1545 return "PPCISD::STRICT_FCFIDS";
1546 case PPCISD::STRICT_FCFIDUS:
1547 return "PPCISD::STRICT_FCFIDUS";
1548 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1549 }
1550 return nullptr;
1551}
1552
1553EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1554 EVT VT) const {
1555 if (!VT.isVector())
1556 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1557
1558 return VT.changeVectorElementTypeToInteger();
1559}
1560
1561bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1562 assert(VT.isFloatingPoint() && "Non-floating-point FMA?")((VT.isFloatingPoint() && "Non-floating-point FMA?") ?
static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint() && \"Non-floating-point FMA?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1562, __PRETTY_FUNCTION__))
;
1563 return true;
1564}
1565
1566//===----------------------------------------------------------------------===//
1567// Node matching predicates, for use by the tblgen matching code.
1568//===----------------------------------------------------------------------===//
1569
1570/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1571static bool isFloatingPointZero(SDValue Op) {
1572 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1573 return CFP->getValueAPF().isZero();
1574 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1575 // Maybe this has already been legalized into the constant pool?
1576 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1577 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1578 return CFP->getValueAPF().isZero();
1579 }
1580 return false;
1581}
1582
1583/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1584/// true if Op is undef or if it matches the specified value.
1585static bool isConstantOrUndef(int Op, int Val) {
1586 return Op < 0 || Op == Val;
1587}
1588
1589/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1590/// VPKUHUM instruction.
1591/// The ShuffleKind distinguishes between big-endian operations with
1592/// two different inputs (0), either-endian operations with two identical
1593/// inputs (1), and little-endian operations with two different inputs (2).
1594/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1595bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1596 SelectionDAG &DAG) {
1597 bool IsLE = DAG.getDataLayout().isLittleEndian();
1598 if (ShuffleKind == 0) {
1599 if (IsLE)
1600 return false;
1601 for (unsigned i = 0; i != 16; ++i)
1602 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1603 return false;
1604 } else if (ShuffleKind == 2) {
1605 if (!IsLE)
1606 return false;
1607 for (unsigned i = 0; i != 16; ++i)
1608 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1609 return false;
1610 } else if (ShuffleKind == 1) {
1611 unsigned j = IsLE ? 0 : 1;
1612 for (unsigned i = 0; i != 8; ++i)
1613 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1614 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1615 return false;
1616 }
1617 return true;
1618}
1619
1620/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1621/// VPKUWUM instruction.
1622/// The ShuffleKind distinguishes between big-endian operations with
1623/// two different inputs (0), either-endian operations with two identical
1624/// inputs (1), and little-endian operations with two different inputs (2).
1625/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1626bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1627 SelectionDAG &DAG) {
1628 bool IsLE = DAG.getDataLayout().isLittleEndian();
1629 if (ShuffleKind == 0) {
1630 if (IsLE)
1631 return false;
1632 for (unsigned i = 0; i != 16; i += 2)
1633 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1634 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1635 return false;
1636 } else if (ShuffleKind == 2) {
1637 if (!IsLE)
1638 return false;
1639 for (unsigned i = 0; i != 16; i += 2)
1640 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1641 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1642 return false;
1643 } else if (ShuffleKind == 1) {
1644 unsigned j = IsLE ? 0 : 2;
1645 for (unsigned i = 0; i != 8; i += 2)
1646 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1647 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1648 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1649 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1650 return false;
1651 }
1652 return true;
1653}
1654
1655/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1656/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1657/// current subtarget.
1658///
1659/// The ShuffleKind distinguishes between big-endian operations with
1660/// two different inputs (0), either-endian operations with two identical
1661/// inputs (1), and little-endian operations with two different inputs (2).
1662/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1663bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1664 SelectionDAG &DAG) {
1665 const PPCSubtarget& Subtarget =
1666 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1667 if (!Subtarget.hasP8Vector())
1668 return false;
1669
1670 bool IsLE = DAG.getDataLayout().isLittleEndian();
1671 if (ShuffleKind == 0) {
1672 if (IsLE)
1673 return false;
1674 for (unsigned i = 0; i != 16; i += 4)
1675 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1676 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1677 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1678 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1679 return false;
1680 } else if (ShuffleKind == 2) {
1681 if (!IsLE)
1682 return false;
1683 for (unsigned i = 0; i != 16; i += 4)
1684 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1685 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1686 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1687 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1688 return false;
1689 } else if (ShuffleKind == 1) {
1690 unsigned j = IsLE ? 0 : 4;
1691 for (unsigned i = 0; i != 8; i += 4)
1692 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1693 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1694 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1695 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1696 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1697 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1698 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1699 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1700 return false;
1701 }
1702 return true;
1703}
1704
1705/// isVMerge - Common function, used to match vmrg* shuffles.
1706///
1707static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1708 unsigned LHSStart, unsigned RHSStart) {
1709 if (N->getValueType(0) != MVT::v16i8)
1710 return false;
1711 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1712, __PRETTY_FUNCTION__))
1712 "Unsupported merge size!")(((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!") ? static_cast<void> (0) : __assert_fail
("(UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && \"Unsupported merge size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1712, __PRETTY_FUNCTION__))
;
1713
1714 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1715 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1716 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1717 LHSStart+j+i*UnitSize) ||
1718 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1719 RHSStart+j+i*UnitSize))
1720 return false;
1721 }
1722 return true;
1723}
1724
1725/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1726/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1727/// The ShuffleKind distinguishes between big-endian merges with two
1728/// different inputs (0), either-endian merges with two identical inputs (1),
1729/// and little-endian merges with two different inputs (2). For the latter,
1730/// the input operands are swapped (see PPCInstrAltivec.td).
1731bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1732 unsigned ShuffleKind, SelectionDAG &DAG) {
1733 if (DAG.getDataLayout().isLittleEndian()) {
1734 if (ShuffleKind == 1) // unary
1735 return isVMerge(N, UnitSize, 0, 0);
1736 else if (ShuffleKind == 2) // swapped
1737 return isVMerge(N, UnitSize, 0, 16);
1738 else
1739 return false;
1740 } else {
1741 if (ShuffleKind == 1) // unary
1742 return isVMerge(N, UnitSize, 8, 8);
1743 else if (ShuffleKind == 0) // normal
1744 return isVMerge(N, UnitSize, 8, 24);
1745 else
1746 return false;
1747 }
1748}
1749
1750/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1751/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1752/// The ShuffleKind distinguishes between big-endian merges with two
1753/// different inputs (0), either-endian merges with two identical inputs (1),
1754/// and little-endian merges with two different inputs (2). For the latter,
1755/// the input operands are swapped (see PPCInstrAltivec.td).
1756bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1757 unsigned ShuffleKind, SelectionDAG &DAG) {
1758 if (DAG.getDataLayout().isLittleEndian()) {
1759 if (ShuffleKind == 1) // unary
1760 return isVMerge(N, UnitSize, 8, 8);
1761 else if (ShuffleKind == 2) // swapped
1762 return isVMerge(N, UnitSize, 8, 24);
1763 else
1764 return false;
1765 } else {
1766 if (ShuffleKind == 1) // unary
1767 return isVMerge(N, UnitSize, 0, 0);
1768 else if (ShuffleKind == 0) // normal
1769 return isVMerge(N, UnitSize, 0, 16);
1770 else
1771 return false;
1772 }
1773}
1774
1775/**
1776 * Common function used to match vmrgew and vmrgow shuffles
1777 *
1778 * The indexOffset determines whether to look for even or odd words in
1779 * the shuffle mask. This is based on the of the endianness of the target
1780 * machine.
1781 * - Little Endian:
1782 * - Use offset of 0 to check for odd elements
1783 * - Use offset of 4 to check for even elements
1784 * - Big Endian:
1785 * - Use offset of 0 to check for even elements
1786 * - Use offset of 4 to check for odd elements
1787 * A detailed description of the vector element ordering for little endian and
1788 * big endian can be found at
1789 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1790 * Targeting your applications - what little endian and big endian IBM XL C/C++
1791 * compiler differences mean to you
1792 *
1793 * The mask to the shuffle vector instruction specifies the indices of the
1794 * elements from the two input vectors to place in the result. The elements are
1795 * numbered in array-access order, starting with the first vector. These vectors
1796 * are always of type v16i8, thus each vector will contain 16 elements of size
1797 * 8. More info on the shuffle vector can be found in the
1798 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1799 * Language Reference.
1800 *
1801 * The RHSStartValue indicates whether the same input vectors are used (unary)
1802 * or two different input vectors are used, based on the following:
1803 * - If the instruction uses the same vector for both inputs, the range of the
1804 * indices will be 0 to 15. In this case, the RHSStart value passed should
1805 * be 0.
1806 * - If the instruction has two different vectors then the range of the
1807 * indices will be 0 to 31. In this case, the RHSStart value passed should
1808 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1809 * to 31 specify elements in the second vector).
1810 *
1811 * \param[in] N The shuffle vector SD Node to analyze
1812 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1813 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1814 * vector to the shuffle_vector instruction
1815 * \return true iff this shuffle vector represents an even or odd word merge
1816 */
1817static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1818 unsigned RHSStartValue) {
1819 if (N->getValueType(0) != MVT::v16i8)
1820 return false;
1821
1822 for (unsigned i = 0; i < 2; ++i)
1823 for (unsigned j = 0; j < 4; ++j)
1824 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1825 i*RHSStartValue+j+IndexOffset) ||
1826 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1827 i*RHSStartValue+j+IndexOffset+8))
1828 return false;
1829 return true;
1830}
1831
1832/**
1833 * Determine if the specified shuffle mask is suitable for the vmrgew or
1834 * vmrgow instructions.
1835 *
1836 * \param[in] N The shuffle vector SD Node to analyze
1837 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1838 * \param[in] ShuffleKind Identify the type of merge:
1839 * - 0 = big-endian merge with two different inputs;
1840 * - 1 = either-endian merge with two identical inputs;
1841 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1842 * little-endian merges).
1843 * \param[in] DAG The current SelectionDAG
1844 * \return true iff this shuffle mask
1845 */
1846bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1847 unsigned ShuffleKind, SelectionDAG &DAG) {
1848 if (DAG.getDataLayout().isLittleEndian()) {
1849 unsigned indexOffset = CheckEven ? 4 : 0;
1850 if (ShuffleKind == 1) // Unary
1851 return isVMerge(N, indexOffset, 0);
1852 else if (ShuffleKind == 2) // swapped
1853 return isVMerge(N, indexOffset, 16);
1854 else
1855 return false;
1856 }
1857 else {
1858 unsigned indexOffset = CheckEven ? 0 : 4;
1859 if (ShuffleKind == 1) // Unary
1860 return isVMerge(N, indexOffset, 0);
1861 else if (ShuffleKind == 0) // Normal
1862 return isVMerge(N, indexOffset, 16);
1863 else
1864 return false;
1865 }
1866 return false;
1867}
1868
1869/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1870/// amount, otherwise return -1.
1871/// The ShuffleKind distinguishes between big-endian operations with two
1872/// different inputs (0), either-endian operations with two identical inputs
1873/// (1), and little-endian operations with two different inputs (2). For the
1874/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1875int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1876 SelectionDAG &DAG) {
1877 if (N->getValueType(0) != MVT::v16i8)
1878 return -1;
1879
1880 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1881
1882 // Find the first non-undef value in the shuffle mask.
1883 unsigned i;
1884 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1885 /*search*/;
1886
1887 if (i == 16) return -1; // all undef.
1888
1889 // Otherwise, check to see if the rest of the elements are consecutively
1890 // numbered from this value.
1891 unsigned ShiftAmt = SVOp->getMaskElt(i);
1892 if (ShiftAmt < i) return -1;
1893
1894 ShiftAmt -= i;
1895 bool isLE = DAG.getDataLayout().isLittleEndian();
1896
1897 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1898 // Check the rest of the elements to see if they are consecutive.
1899 for (++i; i != 16; ++i)
1900 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1901 return -1;
1902 } else if (ShuffleKind == 1) {
1903 // Check the rest of the elements to see if they are consecutive.
1904 for (++i; i != 16; ++i)
1905 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1906 return -1;
1907 } else
1908 return -1;
1909
1910 if (isLE)
1911 ShiftAmt = 16 - ShiftAmt;
1912
1913 return ShiftAmt;
1914}
1915
1916/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1917/// specifies a splat of a single element that is suitable for input to
1918/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1919bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1920 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1921, __PRETTY_FUNCTION__))
1921 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes")((N->getValueType(0) == MVT::v16i8 && isPowerOf2_32
(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && \"Can only handle 1,2,4,8 byte element sizes\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1921, __PRETTY_FUNCTION__))
;
1922
1923 // The consecutive indices need to specify an element, not part of two
1924 // different elements. So abandon ship early if this isn't the case.
1925 if (N->getMaskElt(0) % EltSize != 0)
1926 return false;
1927
1928 // This is a splat operation if each element of the permute is the same, and
1929 // if the value doesn't reference the second vector.
1930 unsigned ElementBase = N->getMaskElt(0);
1931
1932 // FIXME: Handle UNDEF elements too!
1933 if (ElementBase >= 16)
1934 return false;
1935
1936 // Check that the indices are consecutive, in the case of a multi-byte element
1937 // splatted with a v16i8 mask.
1938 for (unsigned i = 1; i != EltSize; ++i)
1939 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1940 return false;
1941
1942 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1943 if (N->getMaskElt(i) < 0) continue;
1944 for (unsigned j = 0; j != EltSize; ++j)
1945 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1946 return false;
1947 }
1948 return true;
1949}
1950
1951/// Check that the mask is shuffling N byte elements. Within each N byte
1952/// element of the mask, the indices could be either in increasing or
1953/// decreasing order as long as they are consecutive.
1954/// \param[in] N the shuffle vector SD Node to analyze
1955/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1956/// Word/DoubleWord/QuadWord).
1957/// \param[in] StepLen the delta indices number among the N byte element, if
1958/// the mask is in increasing/decreasing order then it is 1/-1.
1959/// \return true iff the mask is shuffling N byte elements.
1960static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1961 int StepLen) {
1962 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))
1963 "Unexpected element width.")(((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
"Unexpected element width.") ? static_cast<void> (0) :
__assert_fail ("(Width == 2 || Width == 4 || Width == 8 || Width == 16) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))
;
1964 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.")(((StepLen == 1 || StepLen == -1) && "Unexpected element width."
) ? static_cast<void> (0) : __assert_fail ("(StepLen == 1 || StepLen == -1) && \"Unexpected element width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 1964, __PRETTY_FUNCTION__))
;
1965
1966 unsigned NumOfElem = 16 / Width;
1967 unsigned MaskVal[16]; // Width is never greater than 16
1968 for (unsigned i = 0; i < NumOfElem; ++i) {
1969 MaskVal[0] = N->getMaskElt(i * Width);
1970 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1971 return false;
1972 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1973 return false;
1974 }
1975
1976 for (unsigned int j = 1; j < Width; ++j) {
1977 MaskVal[j] = N->getMaskElt(i * Width + j);
1978 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1979 return false;
1980 }
1981 }
1982 }
1983
1984 return true;
1985}
1986
1987bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1988 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1989 if (!isNByteElemShuffleMask(N, 4, 1))
1990 return false;
1991
1992 // Now we look at mask elements 0,4,8,12
1993 unsigned M0 = N->getMaskElt(0) / 4;
1994 unsigned M1 = N->getMaskElt(4) / 4;
1995 unsigned M2 = N->getMaskElt(8) / 4;
1996 unsigned M3 = N->getMaskElt(12) / 4;
1997 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1998 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1999
2000 // Below, let H and L be arbitrary elements of the shuffle mask
2001 // where H is in the range [4,7] and L is in the range [0,3].
2002 // H, 1, 2, 3 or L, 5, 6, 7
2003 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2004 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2005 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2006 InsertAtByte = IsLE ? 12 : 0;
2007 Swap = M0 < 4;
2008 return true;
2009 }
2010 // 0, H, 2, 3 or 4, L, 6, 7
2011 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2012 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2013 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2014 InsertAtByte = IsLE ? 8 : 4;
2015 Swap = M1 < 4;
2016 return true;
2017 }
2018 // 0, 1, H, 3 or 4, 5, L, 7
2019 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2020 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2021 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2022 InsertAtByte = IsLE ? 4 : 8;
2023 Swap = M2 < 4;
2024 return true;
2025 }
2026 // 0, 1, 2, H or 4, 5, 6, L
2027 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2028 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2029 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2030 InsertAtByte = IsLE ? 0 : 12;
2031 Swap = M3 < 4;
2032 return true;
2033 }
2034
2035 // If both vector operands for the shuffle are the same vector, the mask will
2036 // contain only elements from the first one and the second one will be undef.
2037 if (N->getOperand(1).isUndef()) {
2038 ShiftElts = 0;
2039 Swap = true;
2040 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2041 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2042 InsertAtByte = IsLE ? 12 : 0;
2043 return true;
2044 }
2045 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2046 InsertAtByte = IsLE ? 8 : 4;
2047 return true;
2048 }
2049 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2050 InsertAtByte = IsLE ? 4 : 8;
2051 return true;
2052 }
2053 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2054 InsertAtByte = IsLE ? 0 : 12;
2055 return true;
2056 }
2057 }
2058
2059 return false;
2060}
2061
2062bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2063 bool &Swap, bool IsLE) {
2064 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2064, __PRETTY_FUNCTION__))
;
2065 // Ensure each byte index of the word is consecutive.
2066 if (!isNByteElemShuffleMask(N, 4, 1))
2067 return false;
2068
2069 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2070 unsigned M0 = N->getMaskElt(0) / 4;
2071 unsigned M1 = N->getMaskElt(4) / 4;
2072 unsigned M2 = N->getMaskElt(8) / 4;
2073 unsigned M3 = N->getMaskElt(12) / 4;
2074
2075 // If both vector operands for the shuffle are the same vector, the mask will
2076 // contain only elements from the first one and the second one will be undef.
2077 if (N->getOperand(1).isUndef()) {
2078 assert(M0 < 4 && "Indexing into an undef vector?")((M0 < 4 && "Indexing into an undef vector?") ? static_cast
<void> (0) : __assert_fail ("M0 < 4 && \"Indexing into an undef vector?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2078, __PRETTY_FUNCTION__))
;
2079 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2080 return false;
2081
2082 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2083 Swap = false;
2084 return true;
2085 }
2086
2087 // Ensure each word index of the ShuffleVector Mask is consecutive.
2088 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2089 return false;
2090
2091 if (IsLE) {
2092 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2093 // Input vectors don't need to be swapped if the leading element
2094 // of the result is one of the 3 left elements of the second vector
2095 // (or if there is no shift to be done at all).
2096 Swap = false;
2097 ShiftElts = (8 - M0) % 8;
2098 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2099 // Input vectors need to be swapped if the leading element
2100 // of the result is one of the 3 left elements of the first vector
2101 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2102 Swap = true;
2103 ShiftElts = (4 - M0) % 4;
2104 }
2105
2106 return true;
2107 } else { // BE
2108 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2109 // Input vectors don't need to be swapped if the leading element
2110 // of the result is one of the 4 elements of the first vector.
2111 Swap = false;
2112 ShiftElts = M0;
2113 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2114 // Input vectors need to be swapped if the leading element
2115 // of the result is one of the 4 elements of the right vector.
2116 Swap = true;
2117 ShiftElts = M0 - 4;
2118 }
2119
2120 return true;
2121 }
2122}
2123
2124bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2125 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2125, __PRETTY_FUNCTION__))
;
2126
2127 if (!isNByteElemShuffleMask(N, Width, -1))
2128 return false;
2129
2130 for (int i = 0; i < 16; i += Width)
2131 if (N->getMaskElt(i) != i + Width - 1)
2132 return false;
2133
2134 return true;
2135}
2136
2137bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2138 return isXXBRShuffleMaskHelper(N, 2);
2139}
2140
2141bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2142 return isXXBRShuffleMaskHelper(N, 4);
2143}
2144
2145bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2146 return isXXBRShuffleMaskHelper(N, 8);
2147}
2148
2149bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2150 return isXXBRShuffleMaskHelper(N, 16);
2151}
2152
2153/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2154/// if the inputs to the instruction should be swapped and set \p DM to the
2155/// value for the immediate.
2156/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2157/// AND element 0 of the result comes from the first input (LE) or second input
2158/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2159/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2160/// mask.
2161bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2162 bool &Swap, bool IsLE) {
2163 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8")((N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::v16i8 && \"Shuffle vector expects v16i8\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2163, __PRETTY_FUNCTION__))
;
2164
2165 // Ensure each byte index of the double word is consecutive.
2166 if (!isNByteElemShuffleMask(N, 8, 1))
2167 return false;
2168
2169 unsigned M0 = N->getMaskElt(0) / 8;
2170 unsigned M1 = N->getMaskElt(8) / 8;
2171 assert(((M0 | M1) < 4) && "A mask element out of bounds?")((((M0 | M1) < 4) && "A mask element out of bounds?"
) ? static_cast<void> (0) : __assert_fail ("((M0 | M1) < 4) && \"A mask element out of bounds?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2171, __PRETTY_FUNCTION__))
;
2172
2173 // If both vector operands for the shuffle are the same vector, the mask will
2174 // contain only elements from the first one and the second one will be undef.
2175 if (N->getOperand(1).isUndef()) {
2176 if ((M0 | M1) < 2) {
2177 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2178 Swap = false;
2179 return true;
2180 } else
2181 return false;
2182 }
2183
2184 if (IsLE) {
2185 if (M0 > 1 && M1 < 2) {
2186 Swap = false;
2187 } else if (M0 < 2 && M1 > 1) {
2188 M0 = (M0 + 2) % 4;
2189 M1 = (M1 + 2) % 4;
2190 Swap = true;
2191 } else
2192 return false;
2193
2194 // Note: if control flow comes here that means Swap is already set above
2195 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2196 return true;
2197 } else { // BE
2198 if (M0 < 2 && M1 > 1) {
2199 Swap = false;
2200 } else if (M0 > 1 && M1 < 2) {
2201 M0 = (M0 + 2) % 4;
2202 M1 = (M1 + 2) % 4;
2203 Swap = true;
2204 } else
2205 return false;
2206
2207 // Note: if control flow comes here that means Swap is already set above
2208 DM = (M0 << 1) + (M1 & 1);
2209 return true;
2210 }
2211}
2212
2213
2214/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2215/// appropriate for PPC mnemonics (which have a big endian bias - namely
2216/// elements are counted from the left of the vector register).
2217unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2218 SelectionDAG &DAG) {
2219 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2220 assert(isSplatShuffleMask(SVOp, EltSize))((isSplatShuffleMask(SVOp, EltSize)) ? static_cast<void>
(0) : __assert_fail ("isSplatShuffleMask(SVOp, EltSize)", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2220, __PRETTY_FUNCTION__))
;
2221 if (DAG.getDataLayout().isLittleEndian())
2222 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2223 else
2224 return SVOp->getMaskElt(0) / EltSize;
2225}
2226
2227/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2228/// by using a vspltis[bhw] instruction of the specified element size, return
2229/// the constant being splatted. The ByteSize field indicates the number of
2230/// bytes of each element [124] -> [bhw].
2231SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2232 SDValue OpVal(nullptr, 0);
2233
2234 // If ByteSize of the splat is bigger than the element size of the
2235 // build_vector, then we have a case where we are checking for a splat where
2236 // multiple elements of the buildvector are folded together into a single
2237 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2238 unsigned EltSize = 16/N->getNumOperands();
2239 if (EltSize < ByteSize) {
2240 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2241 SDValue UniquedVals[4];
2242 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?")((Multiple > 1 && Multiple <= 4 && "How can this happen?"
) ? static_cast<void> (0) : __assert_fail ("Multiple > 1 && Multiple <= 4 && \"How can this happen?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2242, __PRETTY_FUNCTION__))
;
2243
2244 // See if all of the elements in the buildvector agree across.
2245 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2246 if (N->getOperand(i).isUndef()) continue;
2247 // If the element isn't a constant, bail fully out.
2248 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2249
2250 if (!UniquedVals[i&(Multiple-1)].getNode())
2251 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2252 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2253 return SDValue(); // no match.
2254 }
2255
2256 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2257 // either constant or undef values that are identical for each chunk. See
2258 // if these chunks can form into a larger vspltis*.
2259
2260 // Check to see if all of the leading entries are either 0 or -1. If
2261 // neither, then this won't fit into the immediate field.
2262 bool LeadingZero = true;
2263 bool LeadingOnes = true;
2264 for (unsigned i = 0; i != Multiple-1; ++i) {
2265 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2266
2267 LeadingZero &= isNullConstant(UniquedVals[i]);
2268 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2269 }
2270 // Finally, check the least significant entry.
2271 if (LeadingZero) {
2272 if (!UniquedVals[Multiple-1].getNode())
2273 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2274 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2275 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2276 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2277 }
2278 if (LeadingOnes) {
2279 if (!UniquedVals[Multiple-1].getNode())
2280 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2281 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2282 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2283 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2284 }
2285
2286 return SDValue();
2287 }
2288
2289 // Check to see if this buildvec has a single non-undef value in its elements.
2290 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2291 if (N->getOperand(i).isUndef()) continue;
2292 if (!OpVal.getNode())
2293 OpVal = N->getOperand(i);
2294 else if (OpVal != N->getOperand(i))
2295 return SDValue();
2296 }
2297
2298 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2299
2300 unsigned ValSizeInBytes = EltSize;
2301 uint64_t Value = 0;
2302 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2303 Value = CN->getZExtValue();
2304 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2305 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!")((CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"
) ? static_cast<void> (0) : __assert_fail ("CN->getValueType(0) == MVT::f32 && \"Only one legal FP vector type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2305, __PRETTY_FUNCTION__))
;
2306 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2307 }
2308
2309 // If the splat value is larger than the element value, then we can never do
2310 // this splat. The only case that we could fit the replicated bits into our
2311 // immediate field for would be zero, and we prefer to use vxor for it.
2312 if (ValSizeInBytes < ByteSize) return SDValue();
2313
2314 // If the element value is larger than the splat value, check if it consists
2315 // of a repeated bit pattern of size ByteSize.
2316 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2317 return SDValue();
2318
2319 // Properly sign extend the value.
2320 int MaskVal = SignExtend32(Value, ByteSize * 8);
2321
2322 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2323 if (MaskVal == 0) return SDValue();
2324
2325 // Finally, if this value fits in a 5 bit sext field, return it
2326 if (SignExtend32<5>(MaskVal) == MaskVal)
2327 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2328 return SDValue();
2329}
2330
2331/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2332/// amount, otherwise return -1.
2333int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2334 EVT VT = N->getValueType(0);
2335 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2336 return -1;
2337
2338 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2339
2340 // Find the first non-undef value in the shuffle mask.
2341 unsigned i;
2342 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2343 /*search*/;
2344
2345 if (i == 4) return -1; // all undef.
2346
2347 // Otherwise, check to see if the rest of the elements are consecutively
2348 // numbered from this value.
2349 unsigned ShiftAmt = SVOp->getMaskElt(i);
2350 if (ShiftAmt < i) return -1;
2351 ShiftAmt -= i;
2352
2353 // Check the rest of the elements to see if they are consecutive.
2354 for (++i; i != 4; ++i)
2355 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2356 return -1;
2357
2358 return ShiftAmt;
2359}
2360
2361//===----------------------------------------------------------------------===//
2362// Addressing Mode Selection
2363//===----------------------------------------------------------------------===//
2364
2365/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2366/// or 64-bit immediate, and if the value can be accurately represented as a
2367/// sign extension from a 16-bit value. If so, this returns true and the
2368/// immediate.
2369bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2370 if (!isa<ConstantSDNode>(N))
2371 return false;
2372
2373 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2374 if (N->getValueType(0) == MVT::i32)
2375 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2376 else
2377 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2378}
2379bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2380 return isIntS16Immediate(Op.getNode(), Imm);
2381}
2382
2383
2384/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2385/// be represented as an indexed [r+r] operation.
2386bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2387 SDValue &Index,
2388 SelectionDAG &DAG) const {
2389 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2390 UI != E; ++UI) {
2391 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2392 if (Memop->getMemoryVT() == MVT::f64) {
2393 Base = N.getOperand(0);
2394 Index = N.getOperand(1);
2395 return true;
2396 }
2397 }
2398 }
2399 return false;
2400}
2401
2402/// SelectAddressRegReg - Given the specified addressed, check to see if it
2403/// can be represented as an indexed [r+r] operation. Returns false if it
2404/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2405/// non-zero and N can be represented by a base register plus a signed 16-bit
2406/// displacement, make a more precise judgement by checking (displacement % \p
2407/// EncodingAlignment).
2408bool PPCTargetLowering::SelectAddressRegReg(
2409 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2410 MaybeAlign EncodingAlignment) const {
2411 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2412 // a [pc+imm].
2413 if (SelectAddressPCRel(N, Base))
2414 return false;
2415
2416 int16_t Imm = 0;
2417 if (N.getOpcode() == ISD::ADD) {
2418 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2419 // SPE load/store can only handle 8-bit offsets.
2420 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2421 return true;
2422 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2423 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2424 return false; // r+i
2425 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2426 return false; // r+i
2427
2428 Base = N.getOperand(0);
2429 Index = N.getOperand(1);
2430 return true;
2431 } else if (N.getOpcode() == ISD::OR) {
2432 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2433 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2434 return false; // r+i can fold it if we can.
2435
2436 // If this is an or of disjoint bitfields, we can codegen this as an add
2437 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2438 // disjoint.
2439 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2440
2441 if (LHSKnown.Zero.getBoolValue()) {
2442 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2443 // If all of the bits are known zero on the LHS or RHS, the add won't
2444 // carry.
2445 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2446 Base = N.getOperand(0);
2447 Index = N.getOperand(1);
2448 return true;
2449 }
2450 }
2451 }
2452
2453 return false;
2454}
2455
2456// If we happen to be doing an i64 load or store into a stack slot that has
2457// less than a 4-byte alignment, then the frame-index elimination may need to
2458// use an indexed load or store instruction (because the offset may not be a
2459// multiple of 4). The extra register needed to hold the offset comes from the
2460// register scavenger, and it is possible that the scavenger will need to use
2461// an emergency spill slot. As a result, we need to make sure that a spill slot
2462// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2463// stack slot.
2464static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2465 // FIXME: This does not handle the LWA case.
2466 if (VT != MVT::i64)
2467 return;
2468
2469 // NOTE: We'll exclude negative FIs here, which come from argument
2470 // lowering, because there are no known test cases triggering this problem
2471 // using packed structures (or similar). We can remove this exclusion if
2472 // we find such a test case. The reason why this is so test-case driven is
2473 // because this entire 'fixup' is only to prevent crashes (from the
2474 // register scavenger) on not-really-valid inputs. For example, if we have:
2475 // %a = alloca i1
2476 // %b = bitcast i1* %a to i64*
2477 // store i64* a, i64 b
2478 // then the store should really be marked as 'align 1', but is not. If it
2479 // were marked as 'align 1' then the indexed form would have been
2480 // instruction-selected initially, and the problem this 'fixup' is preventing
2481 // won't happen regardless.
2482 if (FrameIdx < 0)
2483 return;
2484
2485 MachineFunction &MF = DAG.getMachineFunction();
2486 MachineFrameInfo &MFI = MF.getFrameInfo();
2487
2488 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2489 return;
2490
2491 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2492 FuncInfo->setHasNonRISpills();
2493}
2494
2495/// Returns true if the address N can be represented by a base register plus
2496/// a signed 16-bit displacement [r+imm], and if it is not better
2497/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2498/// displacements that are multiples of that value.
2499bool PPCTargetLowering::SelectAddressRegImm(
2500 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2501 MaybeAlign EncodingAlignment) const {
2502 // FIXME dl should come from parent load or store, not from address
2503 SDLoc dl(N);
2504
2505 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2506 // a [pc+imm].
2507 if (SelectAddressPCRel(N, Base))
2508 return false;
2509
2510 // If this can be more profitably realized as r+r, fail.
2511 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2512 return false;
2513
2514 if (N.getOpcode() == ISD::ADD) {
2515 int16_t imm = 0;
2516 if (isIntS16Immediate(N.getOperand(1), imm) &&
2517 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2518 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2519 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2520 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2521 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2522 } else {
2523 Base = N.getOperand(0);
2524 }
2525 return true; // [r+i]
2526 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2527 // Match LOAD (ADD (X, Lo(G))).
2528 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2529, __PRETTY_FUNCTION__))
2529 && "Cannot handle constant offsets yet!")((!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->
getZExtValue() && "Cannot handle constant offsets yet!"
) ? static_cast<void> (0) : __assert_fail ("!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && \"Cannot handle constant offsets yet!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2529, __PRETTY_FUNCTION__))
;
2530 Disp = N.getOperand(1).getOperand(0); // The global address.
2531 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2532 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2533 Disp.getOpcode() == ISD::TargetConstantPool ||((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
2534 Disp.getOpcode() == ISD::TargetJumpTable)((Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode
() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::
TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable
) ? static_cast<void> (0) : __assert_fail ("Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
;
2535 Base = N.getOperand(0);
2536 return true; // [&g+r]
2537 }
2538 } else if (N.getOpcode() == ISD::OR) {
2539 int16_t imm = 0;
2540 if (isIntS16Immediate(N.getOperand(1), imm) &&
2541 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2542 // If this is an or of disjoint bitfields, we can codegen this as an add
2543 // (for better address arithmetic) if the LHS and RHS of the OR are
2544 // provably disjoint.
2545 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2546
2547 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2548 // If all of the bits are known zero on the LHS or RHS, the add won't
2549 // carry.
2550 if (FrameIndexSDNode *FI =
2551 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2552 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2553 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2554 } else {
2555 Base = N.getOperand(0);
2556 }
2557 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2558 return true;
2559 }
2560 }
2561 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2562 // Loading from a constant address.
2563
2564 // If this address fits entirely in a 16-bit sext immediate field, codegen
2565 // this as "d, 0"
2566 int16_t Imm;
2567 if (isIntS16Immediate(CN, Imm) &&
2568 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2569 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2570 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2571 CN->getValueType(0));
2572 return true;
2573 }
2574
2575 // Handle 32-bit sext immediates with LIS + addr mode.
2576 if ((CN->getValueType(0) == MVT::i32 ||
2577 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2578 (!EncodingAlignment ||
2579 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2580 int Addr = (int)CN->getZExtValue();
2581
2582 // Otherwise, break this down into an LIS + disp.
2583 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2584
2585 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2586 MVT::i32);
2587 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2588 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2589 return true;
2590 }
2591 }
2592
2593 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2594 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2595 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2596 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2597 } else
2598 Base = N;
2599 return true; // [r+0]
2600}
2601
2602/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2603/// represented as an indexed [r+r] operation.
2604bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2605 SDValue &Index,
2606 SelectionDAG &DAG) const {
2607 // Check to see if we can easily represent this as an [r+r] address. This
2608 // will fail if it thinks that the address is more profitably represented as
2609 // reg+imm, e.g. where imm = 0.
2610 if (SelectAddressRegReg(N, Base, Index, DAG))
2611 return true;
2612
2613 // If the address is the result of an add, we will utilize the fact that the
2614 // address calculation includes an implicit add. However, we can reduce
2615 // register pressure if we do not materialize a constant just for use as the
2616 // index register. We only get rid of the add if it is not an add of a
2617 // value and a 16-bit signed constant and both have a single use.
2618 int16_t imm = 0;
2619 if (N.getOpcode() == ISD::ADD &&
2620 (!isIntS16Immediate(N.getOperand(1), imm) ||
2621 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2622 Base = N.getOperand(0);
2623 Index = N.getOperand(1);
2624 return true;
2625 }
2626
2627 // Otherwise, do it the hard way, using R0 as the base register.
2628 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2629 N.getValueType());
2630 Index = N;
2631 return true;
2632}
2633
2634template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2635 Ty *PCRelCand = dyn_cast<Ty>(N);
2636 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2637}
2638
2639/// Returns true if this address is a PC Relative address.
2640/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2641/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2642bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2643 // This is a materialize PC Relative node. Always select this as PC Relative.
2644 Base = N;
2645 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2646 return true;
2647 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2648 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2649 isValidPCRelNode<JumpTableSDNode>(N) ||
2650 isValidPCRelNode<BlockAddressSDNode>(N))
2651 return true;
2652 return false;
2653}
2654
2655/// Returns true if we should use a direct load into vector instruction
2656/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2657static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2658
2659 // If there are any other uses other than scalar to vector, then we should
2660 // keep it as a scalar load -> direct move pattern to prevent multiple
2661 // loads.
2662 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2663 if (!LD)
2664 return false;
2665
2666 EVT MemVT = LD->getMemoryVT();
2667 if (!MemVT.isSimple())
2668 return false;
2669 switch(MemVT.getSimpleVT().SimpleTy) {
2670 case MVT::i64:
2671 break;
2672 case MVT::i32:
2673 if (!ST.hasP8Vector())
2674 return false;
2675 break;
2676 case MVT::i16:
2677 case MVT::i8:
2678 if (!ST.hasP9Vector())
2679 return false;
2680 break;
2681 default:
2682 return false;
2683 }
2684
2685 SDValue LoadedVal(N, 0);
2686 if (!LoadedVal.hasOneUse())
2687 return false;
2688
2689 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2690 UI != UE; ++UI)
2691 if (UI.getUse().get().getResNo() == 0 &&
2692 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2693 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2694 return false;
2695
2696 return true;
2697}
2698
2699/// getPreIndexedAddressParts - returns true by value, base pointer and
2700/// offset pointer and addressing mode by reference if the node's address
2701/// can be legally represented as pre-indexed load / store address.
2702bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2703 SDValue &Offset,
2704 ISD::MemIndexedMode &AM,
2705 SelectionDAG &DAG) const {
2706 if (DisablePPCPreinc) return false;
2707
2708 bool isLoad = true;
2709 SDValue Ptr;
2710 EVT VT;
2711 unsigned Alignment;
2712 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2713 Ptr = LD->getBasePtr();
2714 VT = LD->getMemoryVT();
2715 Alignment = LD->getAlignment();
2716 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2717 Ptr = ST->getBasePtr();
2718 VT = ST->getMemoryVT();
2719 Alignment = ST->getAlignment();
2720 isLoad = false;
2721 } else
2722 return false;
2723
2724 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2725 // instructions because we can fold these into a more efficient instruction
2726 // instead, (such as LXSD).
2727 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2728 return false;
2729 }
2730
2731 // PowerPC doesn't have preinc load/store instructions for vectors
2732 if (VT.isVector())
2733 return false;
2734
2735 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2736 // Common code will reject creating a pre-inc form if the base pointer
2737 // is a frame index, or if N is a store and the base pointer is either
2738 // the same as or a predecessor of the value being stored. Check for
2739 // those situations here, and try with swapped Base/Offset instead.
2740 bool Swap = false;
2741
2742 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2743 Swap = true;
2744 else if (!isLoad) {
2745 SDValue Val = cast<StoreSDNode>(N)->getValue();
2746 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2747 Swap = true;
2748 }
2749
2750 if (Swap)
2751 std::swap(Base, Offset);
2752
2753 AM = ISD::PRE_INC;
2754 return true;
2755 }
2756
2757 // LDU/STU can only handle immediates that are a multiple of 4.
2758 if (VT != MVT::i64) {
2759 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2760 return false;
2761 } else {
2762 // LDU/STU need an address with at least 4-byte alignment.
2763 if (Alignment < 4)
2764 return false;
2765
2766 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2767 return false;
2768 }
2769
2770 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2771 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2772 // sext i32 to i64 when addr mode is r+i.
2773 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2774 LD->getExtensionType() == ISD::SEXTLOAD &&
2775 isa<ConstantSDNode>(Offset))
2776 return false;
2777 }
2778
2779 AM = ISD::PRE_INC;
2780 return true;
2781}
2782
2783//===----------------------------------------------------------------------===//
2784// LowerOperation implementation
2785//===----------------------------------------------------------------------===//
2786
2787/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2788/// and LoOpFlags to the target MO flags.
2789static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2790 unsigned &HiOpFlags, unsigned &LoOpFlags,
2791 const GlobalValue *GV = nullptr) {
2792 HiOpFlags = PPCII::MO_HA;
2793 LoOpFlags = PPCII::MO_LO;
2794
2795 // Don't use the pic base if not in PIC relocation model.
2796 if (IsPIC) {
2797 HiOpFlags |= PPCII::MO_PIC_FLAG;
2798 LoOpFlags |= PPCII::MO_PIC_FLAG;
2799 }
2800}
2801
2802static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2803 SelectionDAG &DAG) {
2804 SDLoc DL(HiPart);
2805 EVT PtrVT = HiPart.getValueType();
2806 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2807
2808 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2809 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2810
2811 // With PIC, the first instruction is actually "GR+hi(&G)".
2812 if (isPIC)
2813 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2814 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2815
2816 // Generate non-pic code that has direct accesses to the constant pool.
2817 // The address of the global is just (hi(&g)+lo(&g)).
2818 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2819}
2820
2821static void setUsesTOCBasePtr(MachineFunction &MF) {
2822 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2823 FuncInfo->setUsesTOCBasePtr();
2824}
2825
2826static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2827 setUsesTOCBasePtr(DAG.getMachineFunction());
2828}
2829
2830SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2831 SDValue GA) const {
2832 const bool Is64Bit = Subtarget.isPPC64();
2833 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2834 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2835 : Subtarget.isAIXABI()
2836 ? DAG.getRegister(PPC::R2, VT)
2837 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2838 SDValue Ops[] = { GA, Reg };
2839 return DAG.getMemIntrinsicNode(
2840 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2841 MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2842 MachineMemOperand::MOLoad);
2843}
2844
2845SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2846 SelectionDAG &DAG) const {
2847 EVT PtrVT = Op.getValueType();
2848 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2849 const Constant *C = CP->getConstVal();
2850
2851 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2852 // The actual address of the GlobalValue is stored in the TOC.
2853 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2854 if (Subtarget.isUsingPCRelativeCalls()) {
2855 SDLoc DL(CP);
2856 EVT Ty = getPointerTy(DAG.getDataLayout());
2857 SDValue ConstPool = DAG.getTargetConstantPool(
2858 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2859 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2860 }
2861 setUsesTOCBasePtr(DAG);
2862 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2863 return getTOCEntry(DAG, SDLoc(CP), GA);
2864 }
2865
2866 unsigned MOHiFlag, MOLoFlag;
2867 bool IsPIC = isPositionIndependent();
2868 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2869
2870 if (IsPIC && Subtarget.isSVR4ABI()) {
2871 SDValue GA =
2872 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2873 return getTOCEntry(DAG, SDLoc(CP), GA);
2874 }
2875
2876 SDValue CPIHi =
2877 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2878 SDValue CPILo =
2879 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2880 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2881}
2882
2883// For 64-bit PowerPC, prefer the more compact relative encodings.
2884// This trades 32 bits per jump table entry for one or two instructions
2885// on the jump site.
2886unsigned PPCTargetLowering::getJumpTableEncoding() const {
2887 if (isJumpTableRelative())
2888 return MachineJumpTableInfo::EK_LabelDifference32;
2889
2890 return TargetLowering::getJumpTableEncoding();
2891}
2892
2893bool PPCTargetLowering::isJumpTableRelative() const {
2894 if (UseAbsoluteJumpTables)
2895 return false;
2896 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2897 return true;
2898 return TargetLowering::isJumpTableRelative();
2899}
2900
2901SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2902 SelectionDAG &DAG) const {
2903 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2904 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2905
2906 switch (getTargetMachine().getCodeModel()) {
2907 case CodeModel::Small:
2908 case CodeModel::Medium:
2909 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2910 default:
2911 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2912 getPointerTy(DAG.getDataLayout()));
2913 }
2914}
2915
2916const MCExpr *
2917PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2918 unsigned JTI,
2919 MCContext &Ctx) const {
2920 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2921 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2922
2923 switch (getTargetMachine().getCodeModel()) {
2924 case CodeModel::Small:
2925 case CodeModel::Medium:
2926 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2927 default:
2928 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2929 }
2930}
2931
2932SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2933 EVT PtrVT = Op.getValueType();
2934 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2935
2936 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2937 if (Subtarget.isUsingPCRelativeCalls()) {
2938 SDLoc DL(JT);
2939 EVT Ty = getPointerTy(DAG.getDataLayout());
2940 SDValue GA =
2941 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2942 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2943 return MatAddr;
2944 }
2945
2946 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2947 // The actual address of the GlobalValue is stored in the TOC.
2948 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2949 setUsesTOCBasePtr(DAG);
2950 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2951 return getTOCEntry(DAG, SDLoc(JT), GA);
2952 }
2953
2954 unsigned MOHiFlag, MOLoFlag;
2955 bool IsPIC = isPositionIndependent();
2956 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2957
2958 if (IsPIC && Subtarget.isSVR4ABI()) {
2959 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2960 PPCII::MO_PIC_FLAG);
2961 return getTOCEntry(DAG, SDLoc(GA), GA);
2962 }
2963
2964 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2965 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2966 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2967}
2968
2969SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2970 SelectionDAG &DAG) const {
2971 EVT PtrVT = Op.getValueType();
2972 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2973 const BlockAddress *BA = BASDN->getBlockAddress();
2974
2975 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2976 if (Subtarget.isUsingPCRelativeCalls()) {
2977 SDLoc DL(BASDN);
2978 EVT Ty = getPointerTy(DAG.getDataLayout());
2979 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2980 PPCII::MO_PCREL_FLAG);
2981 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2982 return MatAddr;
2983 }
2984
2985 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2986 // The actual BlockAddress is stored in the TOC.
2987 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2988 setUsesTOCBasePtr(DAG);
2989 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2990 return getTOCEntry(DAG, SDLoc(BASDN), GA);
2991 }
2992
2993 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2994 if (Subtarget.is32BitELFABI() && isPositionIndependent())
2995 return getTOCEntry(
2996 DAG, SDLoc(BASDN),
2997 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2998
2999 unsigned MOHiFlag, MOLoFlag;
3000 bool IsPIC = isPositionIndependent();
3001 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3002 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3003 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3004 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3005}
3006
3007SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3008 SelectionDAG &DAG) const {
3009 // FIXME: TLS addresses currently use medium model code sequences,
3010 // which is the most useful form. Eventually support for small and
3011 // large models could be added if users need it, at the cost of
3012 // additional complexity.
3013 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3014 if (DAG.getTarget().useEmulatedTLS())
3015 return LowerToTLSEmulatedModel(GA, DAG);
3016
3017 SDLoc dl(GA);
3018 const GlobalValue *GV = GA->getGlobal();
3019 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3020 bool is64bit = Subtarget.isPPC64();
3021 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3022 PICLevel::Level picLevel = M->getPICLevel();
3023
3024 const TargetMachine &TM = getTargetMachine();
3025 TLSModel::Model Model = TM.getTLSModel(GV);
3026
3027 if (Model == TLSModel::LocalExec) {
3028 if (Subtarget.isUsingPCRelativeCalls()) {
3029 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3030 SDValue TGA = DAG.getTargetGlobalAddress(
3031 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3032 SDValue MatAddr =
3033 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3034 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3035 }
3036
3037 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3038 PPCII::MO_TPREL_HA);
3039 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3040 PPCII::MO_TPREL_LO);
3041 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3042 : DAG.getRegister(PPC::R2, MVT::i32);
3043
3044 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3045 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3046 }
3047
3048 if (Model == TLSModel::InitialExec) {
3049 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3050 SDValue TGA = DAG.getTargetGlobalAddress(
3051 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3052 SDValue TGATLS = DAG.getTargetGlobalAddress(
3053 GV, dl, PtrVT, 0,
3054 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3055 SDValue TPOffset;
3056 if (IsPCRel) {
3057 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3058 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3059 MachinePointerInfo());
3060 } else {
3061 SDValue GOTPtr;
3062 if (is64bit) {
3063 setUsesTOCBasePtr(DAG);
3064 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3065 GOTPtr =
3066 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3067 } else {
3068 if (!TM.isPositionIndependent())
3069 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3070 else if (picLevel == PICLevel::SmallPIC)
3071 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3072 else
3073 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3074 }
3075 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3076 }
3077 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3078 }
3079
3080 if (Model == TLSModel::GeneralDynamic) {
3081 if (Subtarget.isUsingPCRelativeCalls()) {
3082 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3083 PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3084 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3085 }
3086
3087 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3088 SDValue GOTPtr;
3089 if (is64bit) {
3090 setUsesTOCBasePtr(DAG);
3091 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3092 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3093 GOTReg, TGA);
3094 } else {
3095 if (picLevel == PICLevel::SmallPIC)
3096 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3097 else
3098 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3099 }
3100 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3101 GOTPtr, TGA, TGA);
3102 }
3103
3104 if (Model == TLSModel::LocalDynamic) {
3105 if (Subtarget.isUsingPCRelativeCalls()) {
3106 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3107 PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3108 SDValue MatPCRel =
3109 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3110 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3111 }
3112
3113 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3114 SDValue GOTPtr;
3115 if (is64bit) {
3116 setUsesTOCBasePtr(DAG);
3117 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3118 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3119 GOTReg, TGA);
3120 } else {
3121 if (picLevel == PICLevel::SmallPIC)
3122 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3123 else
3124 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3125 }
3126 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3127 PtrVT, GOTPtr, TGA, TGA);
3128 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3129 PtrVT, TLSAddr, TGA);
3130 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3131 }
3132
3133 llvm_unreachable("Unknown TLS model!")::llvm::llvm_unreachable_internal("Unknown TLS model!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3133)
;
3134}
3135
3136SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3137 SelectionDAG &DAG) const {
3138 EVT PtrVT = Op.getValueType();
3139 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3140 SDLoc DL(GSDN);
3141 const GlobalValue *GV = GSDN->getGlobal();
3142
3143 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3144 // The actual address of the GlobalValue is stored in the TOC.
3145 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3146 if (Subtarget.isUsingPCRelativeCalls()) {
3147 EVT Ty = getPointerTy(DAG.getDataLayout());
3148 if (isAccessedAsGotIndirect(Op)) {
3149 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3150 PPCII::MO_PCREL_FLAG |
3151 PPCII::MO_GOT_FLAG);
3152 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3153 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3154 MachinePointerInfo());
3155 return Load;
3156 } else {
3157 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3158 PPCII::MO_PCREL_FLAG);
3159 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3160 }
3161 }
3162 setUsesTOCBasePtr(DAG);
3163 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3164 return getTOCEntry(DAG, DL, GA);
3165 }
3166
3167 unsigned MOHiFlag, MOLoFlag;
3168 bool IsPIC = isPositionIndependent();
3169 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3170
3171 if (IsPIC && Subtarget.isSVR4ABI()) {
3172 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3173 GSDN->getOffset(),
3174 PPCII::MO_PIC_FLAG);
3175 return getTOCEntry(DAG, DL, GA);
3176 }
3177
3178 SDValue GAHi =
3179 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3180 SDValue GALo =
3181 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3182
3183 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3184}
3185
3186SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3187 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3188 SDLoc dl(Op);
3189
3190 if (Op.getValueType() == MVT::v2i64) {
3191 // When the operands themselves are v2i64 values, we need to do something
3192 // special because VSX has no underlying comparison operations for these.
3193 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3194 // Equality can be handled by casting to the legal type for Altivec
3195 // comparisons, everything else needs to be expanded.
3196 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3197 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3198 DAG.getSetCC(dl, MVT::v4i32,
3199 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3200 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3201 CC));
3202 }
3203
3204 return SDValue();
3205 }
3206
3207 // We handle most of these in the usual way.
3208 return Op;
3209 }
3210
3211 // If we're comparing for equality to zero, expose the fact that this is
3212 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3213 // fold the new nodes.
3214 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3215 return V;
3216
3217 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3218 // Leave comparisons against 0 and -1 alone for now, since they're usually
3219 // optimized. FIXME: revisit this when we can custom lower all setcc
3220 // optimizations.
3221 if (C->isAllOnesValue() || C->isNullValue())
3222 return SDValue();
3223 }
3224
3225 // If we have an integer seteq/setne, turn it into a compare against zero
3226 // by xor'ing the rhs with the lhs, which is faster than setting a
3227 // condition register, reading it back out, and masking the correct bit. The
3228 // normal approach here uses sub to do this instead of xor. Using xor exposes
3229 // the result to other bit-twiddling opportunities.
3230 EVT LHSVT = Op.getOperand(0).getValueType();
3231 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3232 EVT VT = Op.getValueType();
3233 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3234 Op.getOperand(1));
3235 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3236 }
3237 return SDValue();
3238}
3239
3240SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3241 SDNode *Node = Op.getNode();
3242 EVT VT = Node->getValueType(0);
3243 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3244 SDValue InChain = Node->getOperand(0);
3245 SDValue VAListPtr = Node->getOperand(1);
3246 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3247 SDLoc dl(Node);
3248
3249 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")((!Subtarget.isPPC64() && "LowerVAARG is PPC32 only")
? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVAARG is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3249, __PRETTY_FUNCTION__))
;
3250
3251 // gpr_index
3252 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3253 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3254 InChain = GprIndex.getValue(1);
3255
3256 if (VT == MVT::i64) {
3257 // Check if GprIndex is even
3258 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3259 DAG.getConstant(1, dl, MVT::i32));
3260 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3261 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3262 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3263 DAG.getConstant(1, dl, MVT::i32));
3264 // Align GprIndex to be even if it isn't
3265 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3266 GprIndex);
3267 }
3268
3269 // fpr index is 1 byte after gpr
3270 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3271 DAG.getConstant(1, dl, MVT::i32));
3272
3273 // fpr
3274 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3275 FprPtr, MachinePointerInfo(SV), MVT::i8);
3276 InChain = FprIndex.getValue(1);
3277
3278 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3279 DAG.getConstant(8, dl, MVT::i32));
3280
3281 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3282 DAG.getConstant(4, dl, MVT::i32));
3283
3284 // areas
3285 SDValue OverflowArea =
3286 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3287 InChain = OverflowArea.getValue(1);
3288
3289 SDValue RegSaveArea =
3290 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3291 InChain = RegSaveArea.getValue(1);
3292
3293 // select overflow_area if index > 8
3294 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3295 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3296
3297 // adjustment constant gpr_index * 4/8
3298 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3299 VT.isInteger() ? GprIndex : FprIndex,
3300 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3301 MVT::i32));
3302
3303 // OurReg = RegSaveArea + RegConstant
3304 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3305 RegConstant);
3306
3307 // Floating types are 32 bytes into RegSaveArea
3308 if (VT.isFloatingPoint())
3309 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3310 DAG.getConstant(32, dl, MVT::i32));
3311
3312 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3313 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3314 VT.isInteger() ? GprIndex : FprIndex,
3315 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3316 MVT::i32));
3317
3318 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3319 VT.isInteger() ? VAListPtr : FprPtr,
3320 MachinePointerInfo(SV), MVT::i8);
3321
3322 // determine if we should load from reg_save_area or overflow_area
3323 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3324
3325 // increase overflow_area by 4/8 if gpr/fpr > 8
3326 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3327 DAG.getConstant(VT.isInteger() ? 4 : 8,
3328 dl, MVT::i32));
3329
3330 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3331 OverflowAreaPlusN);
3332
3333 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3334 MachinePointerInfo(), MVT::i32);
3335
3336 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3337}
3338
3339SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3340 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only")((!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"
) ? static_cast<void> (0) : __assert_fail ("!Subtarget.isPPC64() && \"LowerVACOPY is PPC32 only\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3340, __PRETTY_FUNCTION__))
;
3341
3342 // We have to copy the entire va_list struct:
3343 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3344 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3345 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3346 false, true, false, MachinePointerInfo(),
3347 MachinePointerInfo());
3348}
3349
3350SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3351 SelectionDAG &DAG) const {
3352 if (Subtarget.isAIXABI())
3353 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3354
3355 return Op.getOperand(0);
3356}
3357
3358SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3359 SelectionDAG &DAG) const {
3360 if (Subtarget.isAIXABI())
3361 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3362
3363 SDValue Chain = Op.getOperand(0);
3364 SDValue Trmp = Op.getOperand(1); // trampoline
3365 SDValue FPtr = Op.getOperand(2); // nested function
3366 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3367 SDLoc dl(Op);
3368
3369 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3370 bool isPPC64 = (PtrVT == MVT::i64);
3371 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3372
3373 TargetLowering::ArgListTy Args;
3374 TargetLowering::ArgListEntry Entry;
3375
3376 Entry.Ty = IntPtrTy;
3377 Entry.Node = Trmp; Args.push_back(Entry);
3378
3379 // TrampSize == (isPPC64 ? 48 : 40);
3380 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3381 isPPC64 ? MVT::i64 : MVT::i32);
3382 Args.push_back(Entry);
3383
3384 Entry.Node = FPtr; Args.push_back(Entry);
3385 Entry.Node = Nest; Args.push_back(Entry);
3386
3387 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3388 TargetLowering::CallLoweringInfo CLI(DAG);
3389 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3390 CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3391 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3392
3393 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3394 return CallResult.second;
3395}
3396
3397SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3398 MachineFunction &MF = DAG.getMachineFunction();
3399 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3400 EVT PtrVT = getPointerTy(MF.getDataLayout());
3401
3402 SDLoc dl(Op);
3403
3404 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3405 // vastart just stores the address of the VarArgsFrameIndex slot into the
3406 // memory location argument.
3407 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3408 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3409 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3410 MachinePointerInfo(SV));
3411 }
3412
3413 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3414 // We suppose the given va_list is already allocated.
3415 //
3416 // typedef struct {
3417 // char gpr; /* index into the array of 8 GPRs
3418 // * stored in the register save area
3419 // * gpr=0 corresponds to r3,
3420 // * gpr=1 to r4, etc.
3421 // */
3422 // char fpr; /* index into the array of 8 FPRs
3423 // * stored in the register save area
3424 // * fpr=0 corresponds to f1,
3425 // * fpr=1 to f2, etc.
3426 // */
3427 // char *overflow_arg_area;
3428 // /* location on stack that holds
3429 // * the next overflow argument
3430 // */
3431 // char *reg_save_area;
3432 // /* where r3:r10 and f1:f8 (if saved)
3433 // * are stored
3434 // */
3435 // } va_list[1];
3436
3437 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3438 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3439 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3440 PtrVT);
3441 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3442 PtrVT);
3443
3444 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3445 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3446
3447 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3448 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3449
3450 uint64_t FPROffset = 1;
3451 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3452
3453 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3454
3455 // Store first byte : number of int regs
3456 SDValue firstStore =
3457 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3458 MachinePointerInfo(SV), MVT::i8);
3459 uint64_t nextOffset = FPROffset;
3460 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3461 ConstFPROffset);
3462
3463 // Store second byte : number of float regs
3464 SDValue secondStore =
3465 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3466 MachinePointerInfo(SV, nextOffset), MVT::i8);
3467 nextOffset += StackOffset;
3468 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3469
3470 // Store second word : arguments given on stack
3471 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3472 MachinePointerInfo(SV, nextOffset));
3473 nextOffset += FrameOffset;
3474 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3475
3476 // Store third word : arguments given in registers
3477 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3478 MachinePointerInfo(SV, nextOffset));
3479}
3480
3481/// FPR - The set of FP registers that should be allocated for arguments
3482/// on Darwin and AIX.
3483static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3484 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3485 PPC::F11, PPC::F12, PPC::F13};
3486
3487/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3488/// the stack.
3489static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3490 unsigned PtrByteSize) {
3491 unsigned ArgSize = ArgVT.getStoreSize();
3492 if (Flags.isByVal())
3493 ArgSize = Flags.getByValSize();
3494
3495 // Round up to multiples of the pointer size, except for array members,
3496 // which are always packed.
3497 if (!Flags.isInConsecutiveRegs())
3498 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3499
3500 return ArgSize;
3501}
3502
3503/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3504/// on the stack.
3505static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3506 ISD::ArgFlagsTy Flags,
3507 unsigned PtrByteSize) {
3508 Align Alignment(PtrByteSize);
3509
3510 // Altivec parameters are padded to a 16 byte boundary.
3511 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3512 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3513 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3514 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3515 Alignment = Align(16);
3516
3517 // ByVal parameters are aligned as requested.
3518 if (Flags.isByVal()) {
3519 auto BVAlign = Flags.getNonZeroByValAlign();
3520 if (BVAlign > PtrByteSize) {
3521 if (BVAlign.value() % PtrByteSize != 0)
3522 llvm_unreachable(::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3523)
3523 "ByVal alignment is not a multiple of the pointer size")::llvm::llvm_unreachable_internal("ByVal alignment is not a multiple of the pointer size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3523)
;
3524
3525 Alignment = BVAlign;
3526 }
3527 }
3528
3529 // Array members are always packed to their original alignment.
3530 if (Flags.isInConsecutiveRegs()) {
3531 // If the array member was split into multiple registers, the first
3532 // needs to be aligned to the size of the full type. (Except for
3533 // ppcf128, which is only aligned as its f64 components.)
3534 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3535 Alignment = Align(OrigVT.getStoreSize());
3536 else
3537 Alignment = Align(ArgVT.getStoreSize());
3538 }
3539
3540 return Alignment;
3541}
3542
3543/// CalculateStackSlotUsed - Return whether this argument will use its
3544/// stack slot (instead of being passed in registers). ArgOffset,
3545/// AvailableFPRs, and AvailableVRs must hold the current argument
3546/// position, and will be updated to account for this argument.
3547static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3548 unsigned PtrByteSize, unsigned LinkageSize,
3549 unsigned ParamAreaSize, unsigned &ArgOffset,
3550 unsigned &AvailableFPRs,
3551 unsigned &AvailableVRs) {
3552 bool UseMemory = false;
3553
3554 // Respect alignment of argument on the stack.
3555 Align Alignment =
3556 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3557 ArgOffset = alignTo(ArgOffset, Alignment);
3558 // If there's no space left in the argument save area, we must
3559 // use memory (this check also catches zero-sized arguments).
3560 if (ArgOffset >= LinkageSize + ParamAreaSize)
3561 UseMemory = true;
3562
3563 // Allocate argument on the stack.
3564 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3565 if (Flags.isInConsecutiveRegsLast())
3566 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3567 // If we overran the argument save area, we must use memory
3568 // (this check catches arguments passed partially in memory)
3569 if (ArgOffset > LinkageSize + ParamAreaSize)
3570 UseMemory = true;
3571
3572 // However, if the argument is actually passed in an FPR or a VR,
3573 // we don't use memory after all.
3574 if (!Flags.isByVal()) {
3575 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3576 if (AvailableFPRs > 0) {
3577 --AvailableFPRs;
3578 return false;
3579 }
3580 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3581 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3582 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3583 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3584 if (AvailableVRs > 0) {
3585 --AvailableVRs;
3586 return false;
3587 }
3588 }
3589
3590 return UseMemory;
3591}
3592
3593/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3594/// ensure minimum alignment required for target.
3595static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3596 unsigned NumBytes) {
3597 return alignTo(NumBytes, Lowering->getStackAlign());
3598}
3599
3600SDValue PPCTargetLowering::LowerFormalArguments(
3601 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3602 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3603 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3604 if (Subtarget.isAIXABI())
3605 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3606 InVals);
3607 if (Subtarget.is64BitELFABI())
3608 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3609 InVals);
3610 if (Subtarget.is32BitELFABI())
3611 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3612 InVals);
3613
3614 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3615 InVals);
3616}
3617
3618SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3619 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3620 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3621 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3622
3623 // 32-bit SVR4 ABI Stack Frame Layout:
3624 // +-----------------------------------+
3625 // +--> | Back chain |
3626 // | +-----------------------------------+
3627 // | | Floating-point register save area |
3628 // | +-----------------------------------+
3629 // | | General register save area |
3630 // | +-----------------------------------+
3631 // | | CR save word |
3632 // | +-----------------------------------+
3633 // | | VRSAVE save word |
3634 // | +-----------------------------------+
3635 // | | Alignment padding |
3636 // | +-----------------------------------+
3637 // | | Vector register save area |
3638 // | +-----------------------------------+
3639 // | | Local variable space |
3640 // | +-----------------------------------+
3641 // | | Parameter list area |
3642 // | +-----------------------------------+
3643 // | | LR save word |
3644 // | +-----------------------------------+
3645 // SP--> +--- | Back chain |
3646 // +-----------------------------------+
3647 //
3648 // Specifications:
3649 // System V Application Binary Interface PowerPC Processor Supplement
3650 // AltiVec Technology Programming Interface Manual
3651
3652 MachineFunction &MF = DAG.getMachineFunction();
3653 MachineFrameInfo &MFI = MF.getFrameInfo();
3654 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3655
3656 EVT PtrVT = getPointerTy(MF.getDataLayout());
3657 // Potential tail calls could cause overwriting of argument stack slots.
3658 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3659 (CallConv == CallingConv::Fast));
3660 const Align PtrAlign(4);
3661
3662 // Assign locations to all of the incoming arguments.
3663 SmallVector<CCValAssign, 16> ArgLocs;
3664 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3665 *DAG.getContext());
3666
3667 // Reserve space for the linkage area on the stack.
3668 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3669 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3670 if (useSoftFloat())
3671 CCInfo.PreAnalyzeFormalArguments(Ins);
3672
3673 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3674 CCInfo.clearWasPPCF128();
3675
3676 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3677 CCValAssign &VA = ArgLocs[i];
3678
3679 // Arguments stored in registers.
3680 if (VA.isRegLoc()) {
3681 const TargetRegisterClass *RC;
3682 EVT ValVT = VA.getValVT();
3683
3684 switch (ValVT.getSimpleVT().SimpleTy) {
3685 default:
3686 llvm_unreachable("ValVT not supported by formal arguments Lowering")::llvm::llvm_unreachable_internal("ValVT not supported by formal arguments Lowering"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3686)
;
3687 case MVT::i1:
3688 case MVT::i32:
3689 RC = &PPC::GPRCRegClass;
3690 break;
3691 case MVT::f32:
3692 if (Subtarget.hasP8Vector())
3693 RC = &PPC::VSSRCRegClass;
3694 else if (Subtarget.hasSPE())
3695 RC = &PPC::GPRCRegClass;
3696 else
3697 RC = &PPC::F4RCRegClass;
3698 break;
3699 case MVT::f64:
3700 if (Subtarget.hasVSX())
3701 RC = &PPC::VSFRCRegClass;
3702 else if (Subtarget.hasSPE())
3703 // SPE passes doubles in GPR pairs.
3704 RC = &PPC::GPRCRegClass;
3705 else
3706 RC = &PPC::F8RCRegClass;
3707 break;
3708 case MVT::v16i8:
3709 case MVT::v8i16:
3710 case MVT::v4i32:
3711 RC = &PPC::VRRCRegClass;
3712 break;
3713 case MVT::v4f32:
3714 RC = &PPC::VRRCRegClass;
3715 break;
3716 case MVT::v2f64:
3717 case MVT::v2i64:
3718 RC = &PPC::VRRCRegClass;
3719 break;
3720 }
3721
3722 SDValue ArgValue;
3723 // Transform the arguments stored in physical registers into
3724 // virtual ones.
3725 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3726 assert(i + 1 < e && "No second half of double precision argument")((i + 1 < e && "No second half of double precision argument"
) ? static_cast<void> (0) : __assert_fail ("i + 1 < e && \"No second half of double precision argument\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3726, __PRETTY_FUNCTION__))
;
3727 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3728 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3729 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3730 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3731 if (!Subtarget.isLittleEndian())
3732 std::swap (ArgValueLo, ArgValueHi);
3733 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3734 ArgValueHi);
3735 } else {
3736 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3737 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3738 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3739 if (ValVT == MVT::i1)
3740 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3741 }
3742
3743 InVals.push_back(ArgValue);
3744 } else {
3745 // Argument stored in memory.
3746 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3746, __PRETTY_FUNCTION__))
;
3747
3748 // Get the extended size of the argument type in stack
3749 unsigned ArgSize = VA.getLocVT().getStoreSize();
3750 // Get the actual size of the argument type
3751 unsigned ObjSize = VA.getValVT().getStoreSize();
3752 unsigned ArgOffset = VA.getLocMemOffset();
3753 // Stack objects in PPC32 are right justified.
3754 ArgOffset += ArgSize - ObjSize;
3755 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3756
3757 // Create load nodes to retrieve arguments from the stack.
3758 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3759 InVals.push_back(
3760 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3761 }
3762 }
3763
3764 // Assign locations to all of the incoming aggregate by value arguments.
3765 // Aggregates passed by value are stored in the local variable space of the
3766 // caller's stack frame, right above the parameter list area.
3767 SmallVector<CCValAssign, 16> ByValArgLocs;
3768 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3769 ByValArgLocs, *DAG.getContext());
3770
3771 // Reserve stack space for the allocations in CCInfo.
3772 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3773
3774 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3775
3776 // Area that is at least reserved in the caller of this function.
3777 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3778 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3779
3780 // Set the size that is at least reserved in caller of this function. Tail
3781 // call optimized function's reserved stack space needs to be aligned so that
3782 // taking the difference between two stack areas will result in an aligned
3783 // stack.
3784 MinReservedArea =
3785 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3786 FuncInfo->setMinReservedArea(MinReservedArea);
3787
3788 SmallVector<SDValue, 8> MemOps;
3789
3790 // If the function takes variable number of arguments, make a frame index for
3791 // the start of the first vararg value... for expansion of llvm.va_start.
3792 if (isVarArg) {
3793 static const MCPhysReg GPArgRegs[] = {
3794 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3795 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3796 };
3797 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3798
3799 static const MCPhysReg FPArgRegs[] = {
3800 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3801 PPC::F8
3802 };
3803 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3804
3805 if (useSoftFloat() || hasSPE())
3806 NumFPArgRegs = 0;
3807
3808 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3809 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3810
3811 // Make room for NumGPArgRegs and NumFPArgRegs.
3812 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3813 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3814
3815 FuncInfo->setVarArgsStackOffset(
3816 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3817 CCInfo.getNextStackOffset(), true));
3818
3819 FuncInfo->setVarArgsFrameIndex(
3820 MFI.CreateStackObject(Depth, Align(8), false));
3821 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3822
3823 // The fixed integer arguments of a variadic function are stored to the
3824 // VarArgsFrameIndex on the stack so that they may be loaded by
3825 // dereferencing the result of va_next.
3826 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3827 // Get an existing live-in vreg, or add a new one.
3828 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3829 if (!VReg)
3830 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3831
3832 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3833 SDValue Store =
3834 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3835 MemOps.push_back(Store);
3836 // Increment the address by four for the next argument to store
3837 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3838 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3839 }
3840
3841 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3842 // is set.
3843 // The double arguments are stored to the VarArgsFrameIndex
3844 // on the stack.
3845 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3846 // Get an existing live-in vreg, or add a new one.
3847 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3848 if (!VReg)
3849 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3850
3851 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3852 SDValue Store =
3853 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3854 MemOps.push_back(Store);
3855 // Increment the address by eight for the next argument to store
3856 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3857 PtrVT);
3858 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3859 }
3860 }
3861
3862 if (!MemOps.empty())
3863 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3864
3865 return Chain;
3866}
3867
3868// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3869// value to MVT::i64 and then truncate to the correct register size.
3870SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3871 EVT ObjectVT, SelectionDAG &DAG,
3872 SDValue ArgVal,
3873 const SDLoc &dl) const {
3874 if (Flags.isSExt())
3875 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3876 DAG.getValueType(ObjectVT));
3877 else if (Flags.isZExt())
3878 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3879 DAG.getValueType(ObjectVT));
3880
3881 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3882}
3883
3884SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3885 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3886 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3887 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3888 // TODO: add description of PPC stack frame format, or at least some docs.
3889 //
3890 bool isELFv2ABI = Subtarget.isELFv2ABI();
3891 bool isLittleEndian = Subtarget.isLittleEndian();
3892 MachineFunction &MF = DAG.getMachineFunction();
3893 MachineFrameInfo &MFI = MF.getFrameInfo();
3894 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3895
3896 assert(!(CallConv == CallingConv::Fast && isVarArg) &&((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__))
3897 "fastcc not supported on varargs functions")((!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions") ? static_cast<
void> (0) : __assert_fail ("!(CallConv == CallingConv::Fast && isVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__))
;
3898
3899 EVT PtrVT = getPointerTy(MF.getDataLayout());
3900 // Potential tail calls could cause overwriting of argument stack slots.
3901 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3902 (CallConv == CallingConv::Fast));
3903 unsigned PtrByteSize = 8;
3904 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3905
3906 static const MCPhysReg GPR[] = {
3907 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3908 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3909 };
3910 static const MCPhysReg VR[] = {
3911 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3912 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3913 };
3914
3915 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3916 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3917 const unsigned Num_VR_Regs = array_lengthof(VR);
3918
3919 // Do a first pass over the arguments to determine whether the ABI
3920 // guarantees that our caller has allocated the parameter save area
3921 // on its stack frame. In the ELFv1 ABI, this is always the case;
3922 // in the ELFv2 ABI, it is true if this is a vararg function or if
3923 // any parameter is located in a stack slot.
3924
3925 bool HasParameterArea = !isELFv2ABI || isVarArg;
3926 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3927 unsigned NumBytes = LinkageSize;
3928 unsigned AvailableFPRs = Num_FPR_Regs;
3929 unsigned AvailableVRs = Num_VR_Regs;
3930 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3931 if (Ins[i].Flags.isNest())
3932 continue;
3933
3934 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3935 PtrByteSize, LinkageSize, ParamAreaSize,
3936 NumBytes, AvailableFPRs, AvailableVRs))
3937 HasParameterArea = true;
3938 }
3939
3940 // Add DAG nodes to load the arguments or copy them out of registers. On
3941 // entry to a function on PPC, the arguments start after the linkage area,
3942 // although the first ones are often in registers.
3943
3944 unsigned ArgOffset = LinkageSize;
3945 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3946 SmallVector<SDValue, 8> MemOps;
3947 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3948 unsigned CurArgIdx = 0;
3949 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3950 SDValue ArgVal;
3951 bool needsLoad = false;
3952 EVT ObjectVT = Ins[ArgNo].VT;
3953 EVT OrigVT = Ins[ArgNo].ArgVT;
3954 unsigned ObjSize = ObjectVT.getStoreSize();
3955 unsigned ArgSize = ObjSize;
3956 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3957 if (Ins[ArgNo].isOrigArg()) {
3958 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3959 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3960 }
3961 // We re-align the argument offset for each argument, except when using the
3962 // fast calling convention, when we need to make sure we do that only when
3963 // we'll actually use a stack slot.
3964 unsigned CurArgOffset;
3965 Align Alignment;
3966 auto ComputeArgOffset = [&]() {
3967 /* Respect alignment of argument on the stack. */
3968 Alignment =
3969 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3970 ArgOffset = alignTo(ArgOffset, Alignment);
3971 CurArgOffset = ArgOffset;
3972 };
3973
3974 if (CallConv != CallingConv::Fast) {
3975 ComputeArgOffset();
3976
3977 /* Compute GPR index associated with argument offset. */
3978 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3979 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3980 }
3981
3982 // FIXME the codegen can be much improved in some cases.
3983 // We do not have to keep everything in memory.
3984 if (Flags.isByVal()) {
3985 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 3985, __PRETTY_FUNCTION__))
;
3986
3987 if (CallConv == CallingConv::Fast)
3988 ComputeArgOffset();
3989
3990 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3991 ObjSize = Flags.getByValSize();
3992 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3993 // Empty aggregate parameters do not take up registers. Examples:
3994 // struct { } a;
3995 // union { } b;
3996 // int c[0];
3997 // etc. However, we have to provide a place-holder in InVals, so
3998 // pretend we have an 8-byte item at the current address for that
3999 // purpose.
4000 if (!ObjSize) {
4001 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4002 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4003 InVals.push_back(FIN);
4004 continue;
4005 }
4006
4007 // Create a stack object covering all stack doublewords occupied
4008 // by the argument. If the argument is (fully or partially) on
4009 // the stack, or if the argument is fully in registers but the
4010 // caller has allocated the parameter save anyway, we can refer
4011 // directly to the caller's stack frame. Otherwise, create a
4012 // local copy in our own frame.
4013 int FI;
4014 if (HasParameterArea ||
4015 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4016 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4017 else
4018 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4019 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4020
4021 // Handle aggregates smaller than 8 bytes.
4022 if (ObjSize < PtrByteSize) {
4023 // The value of the object is its address, which differs from the
4024 // address of the enclosing doubleword on big-endian systems.
4025 SDValue Arg = FIN;
4026 if (!isLittleEndian) {
4027 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4028 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4029 }
4030 InVals.push_back(Arg);
4031
4032 if (GPR_idx != Num_GPR_Regs) {
4033 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4034 FuncInfo->addLiveInAttr(VReg, Flags);
4035 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4036 SDValue Store;
4037
4038 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4039 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4040 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4041 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4042 MachinePointerInfo(&*FuncArg), ObjType);
4043 } else {
4044 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4045 // store the whole register as-is to the parameter save area
4046 // slot.
4047 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4048 MachinePointerInfo(&*FuncArg));
4049 }
4050
4051 MemOps.push_back(Store);
4052 }
4053 // Whether we copied from a register or not, advance the offset
4054 // into the parameter save area by a full doubleword.
4055 ArgOffset += PtrByteSize;
4056 continue;
4057 }
4058
4059 // The value of the object is its address, which is the address of
4060 // its first stack doubleword.
4061 InVals.push_back(FIN);
4062
4063 // Store whatever pieces of the object are in registers to memory.
4064 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4065 if (GPR_idx == Num_GPR_Regs)
4066 break;
4067
4068 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4069 FuncInfo->addLiveInAttr(VReg, Flags);
4070 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4071 SDValue Addr = FIN;
4072 if (j) {
4073 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4074 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4075 }
4076 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4077 MachinePointerInfo(&*FuncArg, j));
4078 MemOps.push_back(Store);
4079 ++GPR_idx;
4080 }
4081 ArgOffset += ArgSize;
4082 continue;
4083 }
4084
4085 switch (ObjectVT.getSimpleVT().SimpleTy) {
4086 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4086)
;
4087 case MVT::i1:
4088 case MVT::i32:
4089 case MVT::i64:
4090 if (Flags.isNest()) {
4091 // The 'nest' parameter, if any, is passed in R11.
4092 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4093 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4094
4095 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4096 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4097
4098 break;
4099 }
4100
4101 // These can be scalar arguments or elements of an integer array type
4102 // passed directly. Clang may use those instead of "byval" aggregate
4103 // types to avoid forcing arguments to memory unnecessarily.
4104 if (GPR_idx != Num_GPR_Regs) {
4105 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4106 FuncInfo->addLiveInAttr(VReg, Flags);
4107 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4108
4109 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4110 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4111 // value to MVT::i64 and then truncate to the correct register size.
4112 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4113 } else {
4114 if (CallConv == CallingConv::Fast)
4115 ComputeArgOffset();
4116
4117 needsLoad = true;
4118 ArgSize = PtrByteSize;
4119 }
4120 if (CallConv != CallingConv::Fast || needsLoad)
4121 ArgOffset += 8;
4122 break;
4123
4124 case MVT::f32:
4125 case MVT::f64:
4126 // These can be scalar arguments or elements of a float array type
4127 // passed directly. The latter are used to implement ELFv2 homogenous
4128 // float aggregates.
4129 if (FPR_idx != Num_FPR_Regs) {
4130 unsigned VReg;
4131
4132 if (ObjectVT == MVT::f32)
4133 VReg = MF.addLiveIn(FPR[FPR_idx],
4134 Subtarget.hasP8Vector()
4135 ? &PPC::VSSRCRegClass
4136 : &PPC::F4RCRegClass);
4137 else
4138 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4139 ? &PPC::VSFRCRegClass
4140 : &PPC::F8RCRegClass);
4141
4142 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4143 ++FPR_idx;
4144 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4145 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4146 // once we support fp <-> gpr moves.
4147
4148 // This can only ever happen in the presence of f32 array types,
4149 // since otherwise we never run out of FPRs before running out
4150 // of GPRs.
4151 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4152 FuncInfo->addLiveInAttr(VReg, Flags);
4153 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4154
4155 if (ObjectVT == MVT::f32) {
4156 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4157 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4158 DAG.getConstant(32, dl, MVT::i32));
4159 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4160 }
4161
4162 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4163 } else {
4164 if (CallConv == CallingConv::Fast)
4165 ComputeArgOffset();
4166
4167 needsLoad = true;
4168 }
4169
4170 // When passing an array of floats, the array occupies consecutive
4171 // space in the argument area; only round up to the next doubleword
4172 // at the end of the array. Otherwise, each float takes 8 bytes.
4173 if (CallConv != CallingConv::Fast || needsLoad) {
4174 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4175 ArgOffset += ArgSize;
4176 if (Flags.isInConsecutiveRegsLast())
4177 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4178 }
4179 break;
4180 case MVT::v4f32:
4181 case MVT::v4i32:
4182 case MVT::v8i16:
4183 case MVT::v16i8:
4184 case MVT::v2f64:
4185 case MVT::v2i64:
4186 case MVT::v1i128:
4187 case MVT::f128:
4188 // These can be scalar arguments or elements of a vector array type
4189 // passed directly. The latter are used to implement ELFv2 homogenous
4190 // vector aggregates.
4191 if (VR_idx != Num_VR_Regs) {
4192 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4193 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4194 ++VR_idx;
4195 } else {
4196 if (CallConv == CallingConv::Fast)
4197 ComputeArgOffset();
4198 needsLoad = true;
4199 }
4200 if (CallConv != CallingConv::Fast || needsLoad)
4201 ArgOffset += 16;
4202 break;
4203 }
4204
4205 // We need to load the argument to a virtual register if we determined
4206 // above that we ran out of physical registers of the appropriate type.
4207 if (needsLoad) {
4208 if (ObjSize < ArgSize && !isLittleEndian)
4209 CurArgOffset += ArgSize - ObjSize;
4210 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4211 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4212 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4213 }
4214
4215 InVals.push_back(ArgVal);
4216 }
4217
4218 // Area that is at least reserved in the caller of this function.
4219 unsigned MinReservedArea;
4220 if (HasParameterArea)
4221 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4222 else
4223 MinReservedArea = LinkageSize;
4224
4225 // Set the size that is at least reserved in caller of this function. Tail
4226 // call optimized functions' reserved stack space needs to be aligned so that
4227 // taking the difference between two stack areas will result in an aligned
4228 // stack.
4229 MinReservedArea =
4230 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4231 FuncInfo->setMinReservedArea(MinReservedArea);
4232
4233 // If the function takes variable number of arguments, make a frame index for
4234 // the start of the first vararg value... for expansion of llvm.va_start.
4235 // On ELFv2ABI spec, it writes:
4236 // C programs that are intended to be *portable* across different compilers
4237 // and architectures must use the header file <stdarg.h> to deal with variable
4238 // argument lists.
4239 if (isVarArg && MFI.hasVAStart()) {
4240 int Depth = ArgOffset;
4241
4242 FuncInfo->setVarArgsFrameIndex(
4243 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4244 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4245
4246 // If this function is vararg, store any remaining integer argument regs
4247 // to their spots on the stack so that they may be loaded by dereferencing
4248 // the result of va_next.
4249 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4250 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4251 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4252 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4253 SDValue Store =
4254 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4255 MemOps.push_back(Store);
4256 // Increment the address by four for the next argument to store
4257 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4258 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4259 }
4260 }
4261
4262 if (!MemOps.empty())
4263 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4264
4265 return Chain;
4266}
4267
4268SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4269 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4270 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4271 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4272 // TODO: add description of PPC stack frame format, or at least some docs.
4273 //
4274 MachineFunction &MF = DAG.getMachineFunction();
4275 MachineFrameInfo &MFI = MF.getFrameInfo();
4276 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4277
4278 EVT PtrVT = getPointerTy(MF.getDataLayout());
4279 bool isPPC64 = PtrVT == MVT::i64;
4280 // Potential tail calls could cause overwriting of argument stack slots.
4281 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4282 (CallConv == CallingConv::Fast));
4283 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4284 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4285 unsigned ArgOffset = LinkageSize;
4286 // Area that is at least reserved in caller of this function.
4287 unsigned MinReservedArea = ArgOffset;
4288
4289 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4290 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4291 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4292 };
4293 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4294 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4295 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4296 };
4297 static const MCPhysReg VR[] = {
4298 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4299 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4300 };
4301
4302 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4303 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4304 const unsigned Num_VR_Regs = array_lengthof( VR);
4305
4306 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4307
4308 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4309
4310 // In 32-bit non-varargs functions, the stack space for vectors is after the
4311 // stack space for non-vectors. We do not use this space unless we have
4312 // too many vectors to fit in registers, something that only occurs in
4313 // constructed examples:), but we have to walk the arglist to figure
4314 // that out...for the pathological case, compute VecArgOffset as the
4315 // start of the vector parameter area. Computing VecArgOffset is the
4316 // entire point of the following loop.
4317 unsigned VecArgOffset = ArgOffset;
4318 if (!isVarArg && !isPPC64) {
4319 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4320 ++ArgNo) {
4321 EVT ObjectVT = Ins[ArgNo].VT;
4322 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4323
4324 if (Flags.isByVal()) {
4325 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4326 unsigned ObjSize = Flags.getByValSize();
4327 unsigned ArgSize =
4328 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4329 VecArgOffset += ArgSize;
4330 continue;
4331 }
4332
4333 switch(ObjectVT.getSimpleVT().SimpleTy) {
4334 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4334)
;
4335 case MVT::i1:
4336 case MVT::i32:
4337 case MVT::f32:
4338 VecArgOffset += 4;
4339 break;
4340 case MVT::i64: // PPC64
4341 case MVT::f64:
4342 // FIXME: We are guaranteed to be !isPPC64 at this point.
4343 // Does MVT::i64 apply?
4344 VecArgOffset += 8;
4345 break;
4346 case MVT::v4f32:
4347 case MVT::v4i32:
4348 case MVT::v8i16:
4349 case MVT::v16i8:
4350 // Nothing to do, we're only looking at Nonvector args here.
4351 break;
4352 }
4353 }
4354 }
4355 // We've found where the vector parameter area in memory is. Skip the
4356 // first 12 parameters; these don't use that memory.
4357 VecArgOffset = ((VecArgOffset+15)/16)*16;
4358 VecArgOffset += 12*16;
4359
4360 // Add DAG nodes to load the arguments or copy them out of registers. On
4361 // entry to a function on PPC, the arguments start after the linkage area,
4362 // although the first ones are often in registers.
4363
4364 SmallVector<SDValue, 8> MemOps;
4365 unsigned nAltivecParamsAtEnd = 0;
4366 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4367 unsigned CurArgIdx = 0;
4368 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4369 SDValue ArgVal;
4370 bool needsLoad = false;
4371 EVT ObjectVT = Ins[ArgNo].VT;
4372 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4373 unsigned ArgSize = ObjSize;
4374 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4375 if (Ins[ArgNo].isOrigArg()) {
4376 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4377 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4378 }
4379 unsigned CurArgOffset = ArgOffset;
4380
4381 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4382 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4383 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4384 if (isVarArg || isPPC64) {
4385 MinReservedArea = ((MinReservedArea+15)/16)*16;
4386 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4387 Flags,
4388 PtrByteSize);
4389 } else nAltivecParamsAtEnd++;
4390 } else
4391 // Calculate min reserved area.
4392 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4393 Flags,
4394 PtrByteSize);
4395
4396 // FIXME the codegen can be much improved in some cases.
4397 // We do not have to keep everything in memory.
4398 if (Flags.isByVal()) {
4399 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit")((Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[ArgNo].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4399, __PRETTY_FUNCTION__))
;
4400
4401 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4402 ObjSize = Flags.getByValSize();
4403 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4404 // Objects of size 1 and 2 are right justified, everything else is
4405 // left justified. This means the memory address is adjusted forwards.
4406 if (ObjSize==1 || ObjSize==2) {
4407 CurArgOffset = CurArgOffset + (4 - ObjSize);
4408 }
4409 // The value of the object is its address.
4410 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4411 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4412 InVals.push_back(FIN);
4413 if (ObjSize==1 || ObjSize==2) {
4414 if (GPR_idx != Num_GPR_Regs) {
4415 unsigned VReg;
4416 if (isPPC64)
4417 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4418 else
4419 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4420 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4422 SDValue Store =
4423 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4424 MachinePointerInfo(&*FuncArg), ObjType);
4425 MemOps.push_back(Store);
4426 ++GPR_idx;
4427 }
4428
4429 ArgOffset += PtrByteSize;
4430
4431 continue;
4432 }
4433 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4434 // Store whatever pieces of the object are in registers
4435 // to memory. ArgOffset will be the address of the beginning
4436 // of the object.
4437 if (GPR_idx != Num_GPR_Regs) {
4438 unsigned VReg;
4439 if (isPPC64)
4440 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4441 else
4442 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4443 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4444 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4445 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4446 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4447 MachinePointerInfo(&*FuncArg, j));
4448 MemOps.push_back(Store);
4449 ++GPR_idx;
4450 ArgOffset += PtrByteSize;
4451 } else {
4452 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4453 break;
4454 }
4455 }
4456 continue;
4457 }
4458
4459 switch (ObjectVT.getSimpleVT().SimpleTy) {
4460 default: llvm_unreachable("Unhandled argument type!")::llvm::llvm_unreachable_internal("Unhandled argument type!",
"/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4460)
;
4461 case MVT::i1:
4462 case MVT::i32:
4463 if (!isPPC64) {
4464 if (GPR_idx != Num_GPR_Regs) {
4465 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4466 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4467
4468 if (ObjectVT == MVT::i1)
4469 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4470
4471 ++GPR_idx;
4472 } else {
4473 needsLoad = true;
4474 ArgSize = PtrByteSize;
4475 }
4476 // All int arguments reserve stack space in the Darwin ABI.
4477 ArgOffset += PtrByteSize;
4478 break;
4479 }
4480 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4481 case MVT::i64: // PPC64
4482 if (GPR_idx != Num_GPR_Regs) {
4483 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4484 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4485
4486 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4487 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4488 // value to MVT::i64 and then truncate to the correct register size.
4489 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4490
4491 ++GPR_idx;
4492 } else {
4493 needsLoad = true;
4494 ArgSize = PtrByteSize;
4495 }
4496 // All int arguments reserve stack space in the Darwin ABI.
4497 ArgOffset += 8;
4498 break;
4499
4500 case MVT::f32:
4501 case MVT::f64:
4502 // Every 4 bytes of argument space consumes one of the GPRs available for
4503 // argument passing.
4504 if (GPR_idx != Num_GPR_Regs) {
4505 ++GPR_idx;
4506 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4507 ++GPR_idx;
4508 }
4509 if (FPR_idx != Num_FPR_Regs) {
4510 unsigned VReg;
4511
4512 if (ObjectVT == MVT::f32)
4513 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4514 else
4515 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4516
4517 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4518 ++FPR_idx;
4519 } else {
4520 needsLoad = true;
4521 }
4522
4523 // All FP arguments reserve stack space in the Darwin ABI.
4524 ArgOffset += isPPC64 ? 8 : ObjSize;
4525 break;
4526 case MVT::v4f32:
4527 case MVT::v4i32:
4528 case MVT::v8i16:
4529 case MVT::v16i8:
4530 // Note that vector arguments in registers don't reserve stack space,
4531 // except in varargs functions.
4532 if (VR_idx != Num_VR_Regs) {
4533 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4534 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4535 if (isVarArg) {
4536 while ((ArgOffset % 16) != 0) {
4537 ArgOffset += PtrByteSize;
4538 if (GPR_idx != Num_GPR_Regs)
4539 GPR_idx++;
4540 }
4541 ArgOffset += 16;
4542 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4543 }
4544 ++VR_idx;
4545 } else {
4546 if (!isVarArg && !isPPC64) {
4547 // Vectors go after all the nonvectors.
4548 CurArgOffset = VecArgOffset;
4549 VecArgOffset += 16;
4550 } else {
4551 // Vectors are aligned.
4552 ArgOffset = ((ArgOffset+15)/16)*16;
4553 CurArgOffset = ArgOffset;
4554 ArgOffset += 16;
4555 }
4556 needsLoad = true;
4557 }
4558 break;
4559 }
4560
4561 // We need to load the argument to a virtual register if we determined above
4562 // that we ran out of physical registers of the appropriate type.
4563 if (needsLoad) {
4564 int FI = MFI.CreateFixedObject(ObjSize,
4565 CurArgOffset + (ArgSize - ObjSize),
4566 isImmutable);
4567 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4568 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4569 }
4570
4571 InVals.push_back(ArgVal);
4572 }
4573
4574 // Allow for Altivec parameters at the end, if needed.
4575 if (nAltivecParamsAtEnd) {
4576 MinReservedArea = ((MinReservedArea+15)/16)*16;
4577 MinReservedArea += 16*nAltivecParamsAtEnd;
4578 }
4579
4580 // Area that is at least reserved in the caller of this function.
4581 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4582
4583 // Set the size that is at least reserved in caller of this function. Tail
4584 // call optimized functions' reserved stack space needs to be aligned so that
4585 // taking the difference between two stack areas will result in an aligned
4586 // stack.
4587 MinReservedArea =
4588 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4589 FuncInfo->setMinReservedArea(MinReservedArea);
4590
4591 // If the function takes variable number of arguments, make a frame index for
4592 // the start of the first vararg value... for expansion of llvm.va_start.
4593 if (isVarArg) {
4594 int Depth = ArgOffset;
4595
4596 FuncInfo->setVarArgsFrameIndex(
4597 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4598 Depth, true));
4599 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4600
4601 // If this function is vararg, store any remaining integer argument regs
4602 // to their spots on the stack so that they may be loaded by dereferencing
4603 // the result of va_next.
4604 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4605 unsigned VReg;
4606
4607 if (isPPC64)
4608 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4609 else
4610 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4611
4612 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4613 SDValue Store =
4614 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4615 MemOps.push_back(Store);
4616 // Increment the address by four for the next argument to store
4617 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4618 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4619 }
4620 }
4621
4622 if (!MemOps.empty())
4623 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4624
4625 return Chain;
4626}
4627
4628/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4629/// adjusted to accommodate the arguments for the tailcall.
4630static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4631 unsigned ParamSize) {
4632
4633 if (!isTailCall) return 0;
4634
4635 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4636 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4637 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4638 // Remember only if the new adjustment is bigger.
4639 if (SPDiff < FI->getTailCallSPDelta())
4640 FI->setTailCallSPDelta(SPDiff);
4641
4642 return SPDiff;
4643}
4644
4645static bool isFunctionGlobalAddress(SDValue Callee);
4646
4647static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4648 const TargetMachine &TM) {
4649 // It does not make sense to call callsShareTOCBase() with a caller that
4650 // is PC Relative since PC Relative callers do not have a TOC.
4651#ifndef NDEBUG
4652 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4653 assert(!STICaller->isUsingPCRelativeCalls() &&((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4654, __PRETTY_FUNCTION__))
4654 "PC Relative callers do not have a TOC and cannot share a TOC Base")((!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"
) ? static_cast<void> (0) : __assert_fail ("!STICaller->isUsingPCRelativeCalls() && \"PC Relative callers do not have a TOC and cannot share a TOC Base\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4654, __PRETTY_FUNCTION__))
;
4655#endif
4656
4657 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4658 // don't have enough information to determine if the caller and callee share
4659 // the same TOC base, so we have to pessimistically assume they don't for
4660 // correctness.
4661 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4662 if (!G)
4663 return false;
4664
4665 const GlobalValue *GV = G->getGlobal();
4666
4667 // If the callee is preemptable, then the static linker will use a plt-stub
4668 // which saves the toc to the stack, and needs a nop after the call
4669 // instruction to convert to a toc-restore.
4670 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4671 return false;
4672
4673 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4674 // We may need a TOC restore in the situation where the caller requires a
4675 // valid TOC but the callee is PC Relative and does not.
4676 const Function *F = dyn_cast<Function>(GV);
4677 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4678
4679 // If we have an Alias we can try to get the function from there.
4680 if (Alias) {
4681 const GlobalObject *GlobalObj = Alias->getBaseObject();
4682 F = dyn_cast<Function>(GlobalObj);
4683 }
4684
4685 // If we still have no valid function pointer we do not have enough
4686 // information to determine if the callee uses PC Relative calls so we must
4687 // assume that it does.
4688 if (!F)
4689 return false;
4690
4691 // If the callee uses PC Relative we cannot guarantee that the callee won't
4692 // clobber the TOC of the caller and so we must assume that the two
4693 // functions do not share a TOC base.
4694 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4695 if (STICallee->isUsingPCRelativeCalls())
4696 return false;
4697
4698 // The medium and large code models are expected to provide a sufficiently
4699 // large TOC to provide all data addressing needs of a module with a
4700 // single TOC.
4701 if (CodeModel::Medium == TM.getCodeModel() ||
4702 CodeModel::Large == TM.getCodeModel())
4703 return true;
4704
4705 // Otherwise we need to ensure callee and caller are in the same section,
4706 // since the linker may allocate multiple TOCs, and we don't know which
4707 // sections will belong to the same TOC base.
4708 if (!GV->isStrongDefinitionForLinker())
4709 return false;
4710
4711 // Any explicitly-specified sections and section prefixes must also match.
4712 // Also, if we're using -ffunction-sections, then each function is always in
4713 // a different section (the same is true for COMDAT functions).
4714 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4715 GV->getSection() != Caller->getSection())
4716 return false;
4717 if (const auto *F = dyn_cast<Function>(GV)) {
4718 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4719 return false;
4720 }
4721
4722 return true;
4723}
4724
4725static bool
4726needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4727 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4728 assert(Subtarget.is64BitELFABI())((Subtarget.is64BitELFABI()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64BitELFABI()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 4728, __PRETTY_FUNCTION__))
;
4729
4730 const unsigned PtrByteSize = 8;
4731 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4732
4733 static const MCPhysReg GPR[] = {
4734 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4735 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4736 };
4737 static const MCPhysReg VR[] = {
4738 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4739 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4740 };
4741
4742 const unsigned NumGPRs = array_lengthof(GPR);
4743 const unsigned NumFPRs = 13;
4744 const unsigned NumVRs = array_lengthof(VR);
4745 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4746
4747 unsigned NumBytes = LinkageSize;
4748 unsigned AvailableFPRs = NumFPRs;
4749 unsigned AvailableVRs = NumVRs;
4750
4751 for (const ISD::OutputArg& Param : Outs) {
4752 if (Param.Flags.isNest()) continue;
4753
4754 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4755 LinkageSize, ParamAreaSize, NumBytes,
4756 AvailableFPRs, AvailableVRs))
4757 return true;
4758 }
4759 return false;
4760}
4761
4762static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4763 if (CB.arg_size() != CallerFn->arg_size())
4764 return false;
4765
4766 auto CalleeArgIter = CB.arg_begin();
4767 auto CalleeArgEnd = CB.arg_end();
4768 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4769
4770 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4771 const Value* CalleeArg = *CalleeArgIter;
4772 const Value* CallerArg = &(*CallerArgIter);
4773 if (CalleeArg == CallerArg)
4774 continue;
4775
4776 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4777 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4778 // }
4779 // 1st argument of callee is undef and has the same type as caller.
4780 if (CalleeArg->getType() == CallerArg->getType() &&
4781 isa<UndefValue>(CalleeArg))
4782 continue;
4783
4784 return false;
4785 }
4786
4787 return true;
4788}
4789
4790// Returns true if TCO is possible between the callers and callees
4791// calling conventions.
4792static bool
4793areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4794 CallingConv::ID CalleeCC) {
4795 // Tail calls are possible with fastcc and ccc.
4796 auto isTailCallableCC = [] (CallingConv::ID CC){
4797 return CC == CallingConv::C || CC == CallingConv::Fast;
4798 };
4799 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4800 return false;
4801
4802 // We can safely tail call both fastcc and ccc callees from a c calling
4803 // convention caller. If the caller is fastcc, we may have less stack space
4804 // than a non-fastcc caller with the same signature so disable tail-calls in
4805 // that case.
4806 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4807}
4808
4809bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4810 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4811 const SmallVectorImpl<ISD::OutputArg> &Outs,
4812 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4813 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4814
4815 if (DisableSCO && !TailCallOpt) return false;
4816
4817 // Variadic argument functions are not supported.
4818 if (isVarArg) return false;
4819
4820 auto &Caller = DAG.getMachineFunction().getFunction();
4821 // Check that the calling conventions are compatible for tco.
4822 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4823 return false;
4824
4825 // Caller contains any byval parameter is not supported.
4826 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4827 return false;
4828
4829 // Callee contains any byval parameter is not supported, too.
4830 // Note: This is a quick work around, because in some cases, e.g.
4831 // caller's stack size > callee's stack size, we are still able to apply
4832 // sibling call optimization. For example, gcc is able to do SCO for caller1
4833 // in the following example, but not for caller2.
4834 // struct test {
4835 // long int a;
4836 // char ary[56];
4837 // } gTest;
4838 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4839 // b->a = v.a;
4840 // return 0;
4841 // }
4842 // void caller1(struct test a, struct test c, struct test *b) {
4843 // callee(gTest, b); }
4844 // void caller2(struct test *b) { callee(gTest, b); }
4845 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4846 return false;
4847
4848 // If callee and caller use different calling conventions, we cannot pass
4849 // parameters on stack since offsets for the parameter area may be different.
4850 if (Caller.getCallingConv() != CalleeCC &&
4851 needStackSlotPassParameters(Subtarget, Outs))
4852 return false;
4853
4854 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4855 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4856 // callee potentially have different TOC bases then we cannot tail call since
4857 // we need to restore the TOC pointer after the call.
4858 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4859 // We cannot guarantee this for indirect calls or calls to external functions.
4860 // When PC-Relative addressing is used, the concept of the TOC is no longer
4861 // applicable so this check is not required.
4862 // Check first for indirect calls.
4863 if (!Subtarget.isUsingPCRelativeCalls() &&
4864 !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4865 return false;
4866
4867 // Check if we share the TOC base.
4868 if (!Subtarget.isUsingPCRelativeCalls() &&
4869 !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4870 return false;
4871
4872 // TCO allows altering callee ABI, so we don't have to check further.
4873 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4874 return true;
4875
4876 if (DisableSCO) return false;
4877
4878 // If callee use the same argument list that caller is using, then we can
4879 // apply SCO on this case. If it is not, then we need to check if callee needs
4880 // stack for passing arguments.
4881 // PC Relative tail calls may not have a CallBase.
4882 // If there is no CallBase we cannot verify if we have the same argument
4883 // list so assume that we don't have the same argument list.
4884 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4885 needStackSlotPassParameters(Subtarget, Outs))
4886 return false;
4887 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4888 return false;
4889
4890 return true;
4891}
4892
4893/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4894/// for tail call optimization. Targets which want to do tail call
4895/// optimization should implement this function.
4896bool
4897PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4898 CallingConv::ID CalleeCC,
4899 bool isVarArg,
4900 const SmallVectorImpl<ISD::InputArg> &Ins,
4901 SelectionDAG& DAG) const {
4902 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4903 return false;
4904
4905 // Variable argument functions are not supported.
4906 if (isVarArg)
4907 return false;
4908
4909 MachineFunction &MF = DAG.getMachineFunction();
4910 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4911 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4912 // Functions containing by val parameters are not supported.
4913 for (unsigned i = 0; i != Ins.size(); i++) {
4914 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4915 if (Flags.isByVal()) return false;
4916 }
4917
4918 // Non-PIC/GOT tail calls are supported.
4919 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4920 return true;
4921
4922 // At the moment we can only do local tail calls (in same module, hidden
4923 // or protected) if we are generating PIC.
4924 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4925 return G->getGlobal()->hasHiddenVisibility()
4926 || G->getGlobal()->hasProtectedVisibility();
4927 }
4928
4929 return false;
4930}
4931
4932/// isCallCompatibleAddress - Return the immediate to use if the specified
4933/// 32-bit value is representable in the immediate field of a BxA instruction.
4934static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4935 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4936 if (!C) return nullptr;
4937
4938 int Addr = C->getZExtValue();
4939 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4940 SignExtend32<26>(Addr) != Addr)
4941 return nullptr; // Top 6 bits have to be sext of immediate.
4942
4943 return DAG
4944 .getConstant(
4945 (int)C->getZExtValue() >> 2, SDLoc(Op),
4946 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4947 .getNode();
4948}
4949
4950namespace {
4951
4952struct TailCallArgumentInfo {
4953 SDValue Arg;
4954 SDValue FrameIdxOp;
4955 int FrameIdx = 0;
4956
4957 TailCallArgumentInfo() = default;
4958};
4959
4960} // end anonymous namespace
4961
4962/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4963static void StoreTailCallArgumentsToStackSlot(
4964 SelectionDAG &DAG, SDValue Chain,
4965 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4966 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4967 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4968 SDValue Arg = TailCallArgs[i].Arg;
4969 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4970 int FI = TailCallArgs[i].FrameIdx;
4971 // Store relative to framepointer.
4972 MemOpChains.push_back(DAG.getStore(
4973 Chain, dl, Arg, FIN,
4974 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4975 }
4976}
4977
4978/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4979/// the appropriate stack slot for the tail call optimized function call.
4980static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4981 SDValue OldRetAddr, SDValue OldFP,
4982 int SPDiff, const SDLoc &dl) {
4983 if (SPDiff) {
4984 // Calculate the new stack slot for the return address.
4985 MachineFunction &MF = DAG.getMachineFunction();
4986 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4987 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4988 bool isPPC64 = Subtarget.isPPC64();
4989 int SlotSize = isPPC64 ? 8 : 4;
4990 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4991 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4992 NewRetAddrLoc, true);
4993 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4994 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4995 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4996 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4997 }
4998 return Chain;
4999}
5000
5001/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5002/// the position of the argument.
5003static void
5004CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5005 SDValue Arg, int SPDiff, unsigned ArgOffset,
5006 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5007 int Offset = ArgOffset + SPDiff;
5008 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5009 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5010 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5011 SDValue FIN = DAG.getFrameIndex(FI, VT);
5012 TailCallArgumentInfo Info;
5013 Info.Arg = Arg;
5014 Info.FrameIdxOp = FIN;
5015 Info.FrameIdx = FI;
5016 TailCallArguments.push_back(Info);
5017}
5018
5019/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5020/// stack slot. Returns the chain as result and the loaded frame pointers in
5021/// LROpOut/FPOpout. Used when tail calling.
5022SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5023 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5024 SDValue &FPOpOut, const SDLoc &dl) const {
5025 if (SPDiff) {
5026 // Load the LR and FP stack slot for later adjusting.
5027 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5028 LROpOut = getReturnAddrFrameIndex(DAG);
5029 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5030 Chain = SDValue(LROpOut.getNode(), 1);
5031 }
5032 return Chain;
5033}
5034
5035/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5036/// by "Src" to address "Dst" of size "Size". Alignment information is
5037/// specified by the specific parameter attribute. The copy will be passed as
5038/// a byval function parameter.
5039/// Sometimes what we are copying is the end of a larger object, the part that
5040/// does not fit in registers.
5041static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5042 SDValue Chain, ISD::ArgFlagsTy Flags,
5043 SelectionDAG &DAG, const SDLoc &dl) {
5044 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5045 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5046 Flags.getNonZeroByValAlign(), false, false, false,
5047 MachinePointerInfo(), MachinePointerInfo());
5048}
5049
5050/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5051/// tail calls.
5052static void LowerMemOpCallTo(
5053 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5054 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5055 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5056 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5057 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5058 if (!isTailCall) {
5059 if (isVector) {
5060 SDValue StackPtr;
5061 if (isPPC64)
5062 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5063 else
5064 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5065 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5066 DAG.getConstant(ArgOffset, dl, PtrVT));
5067 }
5068 MemOpChains.push_back(
5069 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5070 // Calculate and remember argument location.
5071 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5072 TailCallArguments);
5073}
5074
5075static void
5076PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5077 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5078 SDValue FPOp,
5079 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5080 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5081 // might overwrite each other in case of tail call optimization.
5082 SmallVector<SDValue, 8> MemOpChains2;
5083 // Do not flag preceding copytoreg stuff together with the following stuff.
5084 InFlag = SDValue();
5085 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5086 MemOpChains2, dl);
5087 if (!MemOpChains2.empty())
5088 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5089
5090 // Store the return address to the appropriate stack slot.
5091 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5092
5093 // Emit callseq_end just before tailcall node.
5094 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5095 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5096 InFlag = Chain.getValue(1);
5097}
5098
5099// Is this global address that of a function that can be called by name? (as
5100// opposed to something that must hold a descriptor for an indirect call).
5101static bool isFunctionGlobalAddress(SDValue Callee) {
5102 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5103 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5104 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5105 return false;
5106
5107 return G->getGlobal()->getValueType()->isFunctionTy();
5108 }
5109
5110 return false;
5111}
5112
5113SDValue PPCTargetLowering::LowerCallResult(
5114 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5115 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5116 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5117 SmallVector<CCValAssign, 16> RVLocs;
5118 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5119 *DAG.getContext());
5120
5121 CCRetInfo.AnalyzeCallResult(
5122 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5123 ? RetCC_PPC_Cold
5124 : RetCC_PPC);
5125
5126 // Copy all of the result registers out of their specified physreg.
5127 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5128 CCValAssign &VA = RVLocs[i];
5129 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
;
5130
5131 SDValue Val;
5132
5133 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5134 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5135 InFlag);
5136 Chain = Lo.getValue(1);
5137 InFlag = Lo.getValue(2);
5138 VA = RVLocs[++i]; // skip ahead to next loc
5139 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5140 InFlag);
5141 Chain = Hi.getValue(1);
5142 InFlag = Hi.getValue(2);
5143 if (!Subtarget.isLittleEndian())
5144 std::swap (Lo, Hi);
5145 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5146 } else {
5147 Val = DAG.getCopyFromReg(Chain, dl,
5148 VA.getLocReg(), VA.getLocVT(), InFlag);
5149 Chain = Val.getValue(1);
5150 InFlag = Val.getValue(2);
5151 }
5152
5153 switch (VA.getLocInfo()) {
5154 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5154)
;
5155 case CCValAssign::Full: break;
5156 case CCValAssign::AExt:
5157 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5158 break;
5159 case CCValAssign::ZExt:
5160 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5161 DAG.getValueType(VA.getValVT()));
5162 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5163 break;
5164 case CCValAssign::SExt:
5165 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5166 DAG.getValueType(VA.getValVT()));
5167 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5168 break;
5169 }
5170
5171 InVals.push_back(Val);
5172 }
5173
5174 return Chain;
5175}
5176
5177static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5178 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5179 // PatchPoint calls are not indirect.
5180 if (isPatchPoint)
5181 return false;
5182
5183 if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5184 return false;
5185
5186 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5187 // becuase the immediate function pointer points to a descriptor instead of
5188 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5189 // pointer immediate points to the global entry point, while the BLA would
5190 // need to jump to the local entry point (see rL211174).
5191 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5192 isBLACompatibleAddress(Callee, DAG))
5193 return false;
5194
5195 return true;
5196}
5197
5198// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5199static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5200 return Subtarget.isAIXABI() ||
5201 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5202}
5203
5204static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5205 const Function &Caller,
5206 const SDValue &Callee,
5207 const PPCSubtarget &Subtarget,
5208 const TargetMachine &TM) {
5209 if (CFlags.IsTailCall)
5210 return PPCISD::TC_RETURN;
5211
5212 // This is a call through a function pointer.
5213 if (CFlags.IsIndirect) {
5214 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5215 // indirect calls. The save of the caller's TOC pointer to the stack will be
5216 // inserted into the DAG as part of call lowering. The restore of the TOC
5217 // pointer is modeled by using a pseudo instruction for the call opcode that
5218 // represents the 2 instruction sequence of an indirect branch and link,
5219 // immediately followed by a load of the TOC pointer from the the stack save
5220 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5221 // as it is not saved or used.
5222 return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5223 : PPCISD::BCTRL;
5224 }
5225
5226 if (Subtarget.isUsingPCRelativeCalls()) {
5227 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.")((Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.is64BitELFABI() && \"PC Relative is only on ELF ABI.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5227, __PRETTY_FUNCTION__))
;
5228 return PPCISD::CALL_NOTOC;
5229 }
5230
5231 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5232 // immediately following the call instruction if the caller and callee may
5233 // have different TOC bases. At link time if the linker determines the calls
5234 // may not share a TOC base, the call is redirected to a trampoline inserted
5235 // by the linker. The trampoline will (among other things) save the callers
5236 // TOC pointer at an ABI designated offset in the linkage area and the linker
5237 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5238 // into gpr2.
5239 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5240 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5241 : PPCISD::CALL_NOP;
5242
5243 return PPCISD::CALL;
5244}
5245
5246static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5247 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5248 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5249 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5250 return SDValue(Dest, 0);
5251
5252 // Returns true if the callee is local, and false otherwise.
5253 auto isLocalCallee = [&]() {
5254 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5255 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5256 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5257
5258 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5259 !dyn_cast_or_null<GlobalIFunc>(GV);
5260 };
5261
5262 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5263 // a static relocation model causes some versions of GNU LD (2.17.50, at
5264 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5265 // built with secure-PLT.
5266 bool UsePlt =
5267 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5268 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5269
5270 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5271 const TargetMachine &TM = Subtarget.getTargetMachine();
5272 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5273 MCSymbolXCOFF *S =
5274 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5275
5276 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5277 return DAG.getMCSymbol(S, PtrVT);
5278 };
5279
5280 if (isFunctionGlobalAddress(Callee)) {
5281 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5282
5283 if (Subtarget.isAIXABI()) {
5284 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.")((!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("!isa<GlobalIFunc>(GV) && \"IFunc is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5284, __PRETTY_FUNCTION__))
;
5285 return getAIXFuncEntryPointSymbolSDNode(GV);
5286 }
5287 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5288 UsePlt ? PPCII::MO_PLT : 0);
5289 }
5290
5291 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5292 const char *SymName = S->getSymbol();
5293 if (Subtarget.isAIXABI()) {
5294 // If there exists a user-declared function whose name is the same as the
5295 // ExternalSymbol's, then we pick up the user-declared version.
5296 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5297 if (const Function *F =
5298 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5299 return getAIXFuncEntryPointSymbolSDNode(F);
5300
5301 // On AIX, direct function calls reference the symbol for the function's
5302 // entry point, which is named by prepending a "." before the function's
5303 // C-linkage name. A Qualname is returned here because an external
5304 // function entry point is a csect with XTY_ER property.
5305 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5306 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5307 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5308 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5309 SectionKind::getMetadata());
5310 return Sec->getQualNameSymbol();
5311 };
5312
5313 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5314 }
5315 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5316 UsePlt ? PPCII::MO_PLT : 0);
5317 }
5318
5319 // No transformation needed.
5320 assert(Callee.getNode() && "What no callee?")((Callee.getNode() && "What no callee?") ? static_cast
<void> (0) : __assert_fail ("Callee.getNode() && \"What no callee?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5320, __PRETTY_FUNCTION__))
;
5321 return Callee;
5322}
5323
5324static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5325 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))
5326 "Expected a CALLSEQ_STARTSDNode.")((CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."
) ? static_cast<void> (0) : __assert_fail ("CallSeqStart.getOpcode() == ISD::CALLSEQ_START && \"Expected a CALLSEQ_STARTSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5326, __PRETTY_FUNCTION__))
;
5327
5328 // The last operand is the chain, except when the node has glue. If the node
5329 // has glue, then the last operand is the glue, and the chain is the second
5330 // last operand.
5331 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5332 if (LastValue.getValueType() != MVT::Glue)
5333 return LastValue;
5334
5335 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5336}
5337
5338// Creates the node that moves a functions address into the count register
5339// to prepare for an indirect call instruction.
5340static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5341 SDValue &Glue, SDValue &Chain,
5342 const SDLoc &dl) {
5343 SDValue MTCTROps[] = {Chain, Callee, Glue};
5344 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5345 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5346 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5347 // The glue is the second value produced.
5348 Glue = Chain.getValue(1);
5349}
5350
5351static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5352 SDValue &Glue, SDValue &Chain,
5353 SDValue CallSeqStart,
5354 const CallBase *CB, const SDLoc &dl,
5355 bool hasNest,
5356 const PPCSubtarget &Subtarget) {
5357 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5358 // entry point, but to the function descriptor (the function entry point
5359 // address is part of the function descriptor though).
5360 // The function descriptor is a three doubleword structure with the
5361 // following fields: function entry point, TOC base address and
5362 // environment pointer.
5363 // Thus for a call through a function pointer, the following actions need
5364 // to be performed:
5365 // 1. Save the TOC of the caller in the TOC save area of its stack
5366 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5367 // 2. Load the address of the function entry point from the function
5368 // descriptor.
5369 // 3. Load the TOC of the callee from the function descriptor into r2.
5370 // 4. Load the environment pointer from the function descriptor into
5371 // r11.
5372 // 5. Branch to the function entry point address.
5373 // 6. On return of the callee, the TOC of the caller needs to be
5374 // restored (this is done in FinishCall()).
5375 //
5376 // The loads are scheduled at the beginning of the call sequence, and the
5377 // register copies are flagged together to ensure that no other
5378 // operations can be scheduled in between. E.g. without flagging the
5379 // copies together, a TOC access in the caller could be scheduled between
5380 // the assignment of the callee TOC and the branch to the callee, which leads
5381 // to incorrect code.
5382
5383 // Start by loading the function address from the descriptor.
5384 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5385 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5386 ? (MachineMemOperand::MODereferenceable |
5387 MachineMemOperand::MOInvariant)
5388 : MachineMemOperand::MONone;
5389
5390 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5391
5392 // Registers used in building the DAG.
5393 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5394 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5395
5396 // Offsets of descriptor members.
5397 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5398 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5399
5400 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5401 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5402
5403 // One load for the functions entry point address.
5404 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5405 Alignment, MMOFlags);
5406
5407 // One for loading the TOC anchor for the module that contains the called
5408 // function.
5409 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5410 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5411 SDValue TOCPtr =
5412 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5413 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5414
5415 // One for loading the environment pointer.
5416 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5417 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5418 SDValue LoadEnvPtr =
5419 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5420 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5421
5422
5423 // Then copy the newly loaded TOC anchor to the TOC pointer.
5424 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5425 Chain = TOCVal.getValue(0);
5426 Glue = TOCVal.getValue(1);
5427
5428 // If the function call has an explicit 'nest' parameter, it takes the
5429 // place of the environment pointer.
5430 assert((!hasNest || !Subtarget.isAIXABI()) &&(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5431, __PRETTY_FUNCTION__))
5431 "Nest parameter is not supported on AIX.")(((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."
) ? static_cast<void> (0) : __assert_fail ("(!hasNest || !Subtarget.isAIXABI()) && \"Nest parameter is not supported on AIX.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5431, __PRETTY_FUNCTION__))
;
5432 if (!hasNest) {
5433 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5434 Chain = EnvVal.getValue(0);
5435 Glue = EnvVal.getValue(1);
5436 }
5437
5438 // The rest of the indirect call sequence is the same as the non-descriptor
5439 // DAG.
5440 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5441}
5442
5443static void
5444buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5445 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5446 SelectionDAG &DAG,
5447 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5448 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5449 const PPCSubtarget &Subtarget) {
5450 const bool IsPPC64 = Subtarget.isPPC64();
5451 // MVT for a general purpose register.
5452 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5453
5454 // First operand is always the chain.
5455 Ops.push_back(Chain);
5456
5457 // If it's a direct call pass the callee as the second operand.
5458 if (!CFlags.IsIndirect)
5459 Ops.push_back(Callee);
5460 else {
5461 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.")((!CFlags.IsPatchPoint && "Patch point calls are not indirect."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsPatchPoint && \"Patch point calls are not indirect.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5461, __PRETTY_FUNCTION__))
;
5462
5463 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5464 // on the stack (this would have been done in `LowerCall_64SVR4` or
5465 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5466 // represents both the indirect branch and a load that restores the TOC
5467 // pointer from the linkage area. The operand for the TOC restore is an add
5468 // of the TOC save offset to the stack pointer. This must be the second
5469 // operand: after the chain input but before any other variadic arguments.
5470 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5471 // saved or used.
5472 if (isTOCSaveRestoreRequired(Subtarget)) {
5473 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5474
5475 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5476 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5477 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5478 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5479 Ops.push_back(AddTOC);
5480 }
5481
5482 // Add the register used for the environment pointer.
5483 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5484 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5485 RegVT));
5486
5487
5488 // Add CTR register as callee so a bctr can be emitted later.
5489 if (CFlags.IsTailCall)
5490 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5491 }
5492
5493 // If this is a tail call add stack pointer delta.
5494 if (CFlags.IsTailCall)
5495 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5496
5497 // Add argument registers to the end of the list so that they are known live
5498 // into the call.
5499 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5500 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5501 RegsToPass[i].second.getValueType()));
5502
5503 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5504 // no way to mark dependencies as implicit here.
5505 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5506 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5507 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5508 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5509
5510 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5511 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5512 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5513
5514 // Add a register mask operand representing the call-preserved registers.
5515 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5516 const uint32_t *Mask =
5517 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5518 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5518, __PRETTY_FUNCTION__))
;
5519 Ops.push_back(DAG.getRegisterMask(Mask));
5520
5521 // If the glue is valid, it is the last operand.
5522 if (Glue.getNode())
5523 Ops.push_back(Glue);
5524}
5525
5526SDValue PPCTargetLowering::FinishCall(
5527 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5528 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5529 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5530 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5531 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5532
5533 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5534 Subtarget.isAIXABI())
5535 setUsesTOCBasePtr(DAG);
5536
5537 unsigned CallOpc =
5538 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5539 Subtarget, DAG.getTarget());
5540
5541 if (!CFlags.IsIndirect)
5542 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5543 else if (Subtarget.usesFunctionDescriptors())
5544 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5545 dl, CFlags.HasNest, Subtarget);
5546 else
5547 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5548
5549 // Build the operand list for the call instruction.
5550 SmallVector<SDValue, 8> Ops;
5551 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5552 SPDiff, Subtarget);
5553
5554 // Emit tail call.
5555 if (CFlags.IsTailCall) {
5556 // Indirect tail call when using PC Relative calls do not have the same
5557 // constraints.
5558 assert(((Callee.getOpcode() == ISD::Register &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5559 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5560 Callee.getOpcode() == ISD::TargetExternalSymbol ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5561 Callee.getOpcode() == ISD::TargetGlobalAddress ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5562 isa<ConstantSDNode>(Callee) ||((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5563 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5564 "Expecting a global address, external symbol, absolute value, "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5565 "register or an indirect tail call when PC Relative calls are "((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
5566 "used.")((((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode
>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() ==
ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress
|| isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect &&
Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.") ? static_cast<void> (0) : __assert_fail ("((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && \"Expecting a global address, external symbol, absolute value, \" \"register or an indirect tail call when PC Relative calls are \" \"used.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5566, __PRETTY_FUNCTION__))
;
5567 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5568 assert(CallOpc == PPCISD::TC_RETURN &&((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
5569 "Unexpected call opcode for a tail call.")((CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."
) ? static_cast<void> (0) : __assert_fail ("CallOpc == PPCISD::TC_RETURN && \"Unexpected call opcode for a tail call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
;
5570 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5571 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5572 }
5573
5574 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5575 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5576 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5577 Glue = Chain.getValue(1);
5578
5579 // When performing tail call optimization the callee pops its arguments off
5580 // the stack. Account for this here so these bytes can be pushed back on in
5581 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5582 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5583 getTargetMachine().Options.GuaranteedTailCallOpt)
5584 ? NumBytes
5585 : 0;
5586
5587 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5588 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5589 Glue, dl);
5590 Glue = Chain.getValue(1);
5591
5592 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5593 DAG, InVals);
5594}
5595
5596SDValue
5597PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5598 SmallVectorImpl<SDValue> &InVals) const {
5599 SelectionDAG &DAG = CLI.DAG;
5600 SDLoc &dl = CLI.DL;
5601 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5602 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5603 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5604 SDValue Chain = CLI.Chain;
5605 SDValue Callee = CLI.Callee;
5606 bool &isTailCall = CLI.IsTailCall;
5607 CallingConv::ID CallConv = CLI.CallConv;
5608 bool isVarArg = CLI.IsVarArg;
5609 bool isPatchPoint = CLI.IsPatchPoint;
5610 const CallBase *CB = CLI.CB;
5611
5612 if (isTailCall) {
5613 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5614 isTailCall = false;
5615 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5616 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5617 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5618 else
5619 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5620 Ins, DAG);
5621 if (isTailCall) {
5622 ++NumTailCalls;
5623 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5624 ++NumSiblingCalls;
5625
5626 // PC Relative calls no longer guarantee that the callee is a Global
5627 // Address Node. The callee could be an indirect tail call in which
5628 // case the SDValue for the callee could be a load (to load the address
5629 // of a function pointer) or it may be a register copy (to move the
5630 // address of the callee from a function parameter into a virtual
5631 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5632 assert((Subtarget.isUsingPCRelativeCalls() ||(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))
5633 isa<GlobalAddressSDNode>(Callee)) &&(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))
5634 "Callee should be an llvm::Function object.")(((Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode
>(Callee)) && "Callee should be an llvm::Function object."
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.isUsingPCRelativeCalls() || isa<GlobalAddressSDNode>(Callee)) && \"Callee should be an llvm::Function object.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5634, __PRETTY_FUNCTION__))
;
5635
5636 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
5637 << "\nTCO callee: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName() << "\nTCO callee: "; } }
while (false)
;
5638 LLVM_DEBUG(Callee.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { Callee.dump(); } } while (false)
;
5639 }
5640 }
5641
5642 if (!isTailCall && CB && CB->isMustTailCall())
5643 report_fatal_error("failed to perform tail call elimination on a call "
5644 "site marked musttail");
5645
5646 // When long calls (i.e. indirect calls) are always used, calls are always
5647 // made via function pointer. If we have a function name, first translate it
5648 // into a pointer.
5649 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5650 !isTailCall)
5651 Callee = LowerGlobalAddress(Callee, DAG);
5652
5653 CallFlags CFlags(
5654 CallConv, isTailCall, isVarArg, isPatchPoint,
5655 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5656 // hasNest
5657 Subtarget.is64BitELFABI() &&
5658 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5659 CLI.NoMerge);
5660
5661 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5662 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5663 InVals, CB);
5664
5665 if (Subtarget.isSVR4ABI())
5666 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5667 InVals, CB);
5668
5669 if (Subtarget.isAIXABI())
5670 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5671 InVals, CB);
5672
5673 return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5674 InVals, CB);
5675}
5676
5677SDValue PPCTargetLowering::LowerCall_32SVR4(
5678 SDValue Chain, SDValue Callee, CallFlags CFlags,
5679 const SmallVectorImpl<ISD::OutputArg> &Outs,
5680 const SmallVectorImpl<SDValue> &OutVals,
5681 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5682 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5683 const CallBase *CB) const {
5684 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5685 // of the 32-bit SVR4 ABI stack frame layout.
5686
5687 const CallingConv::ID CallConv = CFlags.CallConv;
5688 const bool IsVarArg = CFlags.IsVarArg;
5689 const bool IsTailCall = CFlags.IsTailCall;
5690
5691 assert((CallConv == CallingConv::C ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
5692 CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
5693 CallConv == CallingConv::Fast) && "Unknown calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unknown calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unknown calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5693, __PRETTY_FUNCTION__))
;
5694
5695 const Align PtrAlign(4);
5696
5697 MachineFunction &MF = DAG.getMachineFunction();
5698
5699 // Mark this function as potentially containing a function that contains a
5700 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5701 // and restoring the callers stack pointer in this functions epilog. This is
5702 // done because by tail calling the called function might overwrite the value
5703 // in this function's (MF) stack pointer stack slot 0(SP).
5704 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5705 CallConv == CallingConv::Fast)
5706 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5707
5708 // Count how many bytes are to be pushed on the stack, including the linkage
5709 // area, parameter list area and the part of the local variable space which
5710 // contains copies of aggregates which are passed by value.
5711
5712 // Assign locations to all of the outgoing arguments.
5713 SmallVector<CCValAssign, 16> ArgLocs;
5714 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5715
5716 // Reserve space for the linkage area on the stack.
5717 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5718 PtrAlign);
5719 if (useSoftFloat())
5720 CCInfo.PreAnalyzeCallOperands(Outs);
5721
5722 if (IsVarArg) {
5723 // Handle fixed and variable vector arguments differently.
5724 // Fixed vector arguments go into registers as long as registers are
5725 // available. Variable vector arguments always go into memory.
5726 unsigned NumArgs = Outs.size();
5727
5728 for (unsigned i = 0; i != NumArgs; ++i) {
5729 MVT ArgVT = Outs[i].VT;
5730 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5731 bool Result;
5732
5733 if (Outs[i].IsFixed) {
5734 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5735 CCInfo);
5736 } else {
5737 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5738 ArgFlags, CCInfo);
5739 }
5740
5741 if (Result) {
5742#ifndef NDEBUG
5743 errs() << "Call operand #" << i << " has unhandled type "
5744 << EVT(ArgVT).getEVTString() << "\n";
5745#endif
5746 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5746)
;
5747 }
5748 }
5749 } else {
5750 // All arguments are treated the same.
5751 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5752 }
5753 CCInfo.clearWasPPCF128();
5754
5755 // Assign locations to all of the outgoing aggregate by value arguments.
5756 SmallVector<CCValAssign, 16> ByValArgLocs;
5757 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5758
5759 // Reserve stack space for the allocations in CCInfo.
5760 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5761
5762 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5763
5764 // Size of the linkage area, parameter list area and the part of the local
5765 // space variable where copies of aggregates which are passed by value are
5766 // stored.
5767 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5768
5769 // Calculate by how many bytes the stack has to be adjusted in case of tail
5770 // call optimization.
5771 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5772
5773 // Adjust the stack pointer for the new arguments...
5774 // These operations are automatically eliminated by the prolog/epilog pass
5775 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5776 SDValue CallSeqStart = Chain;
5777
5778 // Load the return address and frame pointer so it can be moved somewhere else
5779 // later.
5780 SDValue LROp, FPOp;
5781 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5782
5783 // Set up a copy of the stack pointer for use loading and storing any
5784 // arguments that may not fit in the registers available for argument
5785 // passing.
5786 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5787
5788 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5789 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5790 SmallVector<SDValue, 8> MemOpChains;
5791
5792 bool seenFloatArg = false;
5793 // Walk the register/memloc assignments, inserting copies/loads.
5794 // i - Tracks the index into the list of registers allocated for the call
5795 // RealArgIdx - Tracks the index into the list of actual function arguments
5796 // j - Tracks the index into the list of byval arguments
5797 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5798 i != e;
5799 ++i, ++RealArgIdx) {
5800 CCValAssign &VA = ArgLocs[i];
5801 SDValue Arg = OutVals[RealArgIdx];
5802 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5803
5804 if (Flags.isByVal()) {
5805 // Argument is an aggregate which is passed by value, thus we need to
5806 // create a copy of it in the local variable space of the current stack
5807 // frame (which is the stack frame of the caller) and pass the address of
5808 // this copy to the callee.
5809 assert((j < ByValArgLocs.size()) && "Index out of bounds!")(((j < ByValArgLocs.size()) && "Index out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("(j < ByValArgLocs.size()) && \"Index out of bounds!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5809, __PRETTY_FUNCTION__))
;
5810 CCValAssign &ByValVA = ByValArgLocs[j++];
5811 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!")(((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"
) ? static_cast<void> (0) : __assert_fail ("(VA.getValNo() == ByValVA.getValNo()) && \"ValNo mismatch!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5811, __PRETTY_FUNCTION__))
;
5812
5813 // Memory reserved in the local variable space of the callers stack frame.
5814 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5815
5816 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5817 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5818 StackPtr, PtrOff);
5819
5820 // Create a copy of the argument in the local area of the current
5821 // stack frame.
5822 SDValue MemcpyCall =
5823 CreateCopyOfByValArgument(Arg, PtrOff,
5824 CallSeqStart.getNode()->getOperand(0),
5825 Flags, DAG, dl);
5826
5827 // This must go outside the CALLSEQ_START..END.
5828 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5829 SDLoc(MemcpyCall));
5830 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5831 NewCallSeqStart.getNode());
5832 Chain = CallSeqStart = NewCallSeqStart;
5833
5834 // Pass the address of the aggregate copy on the stack either in a
5835 // physical register or in the parameter list area of the current stack
5836 // frame to the callee.
5837 Arg = PtrOff;
5838 }
5839
5840 // When useCRBits() is true, there can be i1 arguments.
5841 // It is because getRegisterType(MVT::i1) => MVT::i1,
5842 // and for other integer types getRegisterType() => MVT::i32.
5843 // Extend i1 and ensure callee will get i32.
5844 if (Arg.getValueType() == MVT::i1)
5845 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5846 dl, MVT::i32, Arg);
5847
5848 if (VA.isRegLoc()) {
5849 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5850 // Put argument in a physical register.
5851 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5852 bool IsLE = Subtarget.isLittleEndian();
5853 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5854 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5855 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5856 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5857 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5858 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5859 SVal.getValue(0)));
5860 } else
5861 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5862 } else {
5863 // Put argument in the parameter list area of the current stack frame.
5864 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5864, __PRETTY_FUNCTION__))
;
5865 unsigned LocMemOffset = VA.getLocMemOffset();
5866
5867 if (!IsTailCall) {
5868 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5869 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5870 StackPtr, PtrOff);
5871
5872 MemOpChains.push_back(
5873 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5874 } else {
5875 // Calculate and remember argument location.
5876 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5877 TailCallArguments);
5878 }
5879 }
5880 }
5881
5882 if (!MemOpChains.empty())
5883 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5884
5885 // Build a sequence of copy-to-reg nodes chained together with token chain
5886 // and flag operands which copy the outgoing args into the appropriate regs.
5887 SDValue InFlag;
5888 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5889 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5890 RegsToPass[i].second, InFlag);
5891 InFlag = Chain.getValue(1);
5892 }
5893
5894 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5895 // registers.
5896 if (IsVarArg) {
5897 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5898 SDValue Ops[] = { Chain, InFlag };
5899
5900 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5901 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5902
5903 InFlag = Chain.getValue(1);
5904 }
5905
5906 if (IsTailCall)
5907 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5908 TailCallArguments);
5909
5910 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5911 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5912}
5913
5914// Copy an argument into memory, being careful to do this outside the
5915// call sequence for the call to which the argument belongs.
5916SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5917 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5918 SelectionDAG &DAG, const SDLoc &dl) const {
5919 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5920 CallSeqStart.getNode()->getOperand(0),
5921 Flags, DAG, dl);
5922 // The MEMCPY must go outside the CALLSEQ_START..END.
5923 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5924 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5925 SDLoc(MemcpyCall));
5926 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5927 NewCallSeqStart.getNode());
5928 return NewCallSeqStart;
5929}
5930
5931SDValue PPCTargetLowering::LowerCall_64SVR4(
5932 SDValue Chain, SDValue Callee, CallFlags CFlags,
5933 const SmallVectorImpl<ISD::OutputArg> &Outs,
5934 const SmallVectorImpl<SDValue> &OutVals,
5935 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5936 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5937 const CallBase *CB) const {
5938 bool isELFv2ABI = Subtarget.isELFv2ABI();
5939 bool isLittleEndian = Subtarget.isLittleEndian();
5940 unsigned NumOps = Outs.size();
5941 bool IsSibCall = false;
5942 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5943
5944 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5945 unsigned PtrByteSize = 8;
5946
5947 MachineFunction &MF = DAG.getMachineFunction();
5948
5949 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5950 IsSibCall = true;
5951
5952 // Mark this function as potentially containing a function that contains a
5953 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5954 // and restoring the callers stack pointer in this functions epilog. This is
5955 // done because by tail calling the called function might overwrite the value
5956 // in this function's (MF) stack pointer stack slot 0(SP).
5957 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5958 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5959
5960 assert(!(IsFastCall && CFlags.IsVarArg) &&((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5961, __PRETTY_FUNCTION__))
5961 "fastcc not supported on varargs functions")((!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"
) ? static_cast<void> (0) : __assert_fail ("!(IsFastCall && CFlags.IsVarArg) && \"fastcc not supported on varargs functions\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 5961, __PRETTY_FUNCTION__))
;
5962
5963 // Count how many bytes are to be pushed on the stack, including the linkage
5964 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5965 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5966 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5967 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5968 unsigned NumBytes = LinkageSize;
5969 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5970
5971 static const MCPhysReg GPR[] = {
5972 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5973 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5974 };
5975 static const MCPhysReg VR[] = {
5976 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5977 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5978 };
5979
5980 const unsigned NumGPRs = array_lengthof(GPR);
5981 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5982 const unsigned NumVRs = array_lengthof(VR);
5983
5984 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5985 // can be passed to the callee in registers.
5986 // For the fast calling convention, there is another check below.
5987 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5988 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5989 if (!HasParameterArea) {
5990 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5991 unsigned AvailableFPRs = NumFPRs;
5992 unsigned AvailableVRs = NumVRs;
5993 unsigned NumBytesTmp = NumBytes;
5994 for (unsigned i = 0; i != NumOps; ++i) {
5995 if (Outs[i].Flags.isNest()) continue;
5996 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5997 PtrByteSize, LinkageSize, ParamAreaSize,
5998 NumBytesTmp, AvailableFPRs, AvailableVRs))
5999 HasParameterArea = true;
6000 }
6001 }
6002
6003 // When using the fast calling convention, we don't provide backing for
6004 // arguments that will be in registers.
6005 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6006
6007 // Avoid allocating parameter area for fastcc functions if all the arguments
6008 // can be passed in the registers.
6009 if (IsFastCall)
6010 HasParameterArea = false;
6011
6012 // Add up all the space actually used.
6013 for (unsigned i = 0; i != NumOps; ++i) {
6014 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6015 EVT ArgVT = Outs[i].VT;
6016 EVT OrigVT = Outs[i].ArgVT;
6017
6018 if (Flags.isNest())
6019 continue;
6020
6021 if (IsFastCall) {
6022 if (Flags.isByVal()) {
6023 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6024 if (NumGPRsUsed > NumGPRs)
6025 HasParameterArea = true;
6026 } else {
6027 switch (ArgVT.getSimpleVT().SimpleTy) {
6028 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6028)
;
6029 case MVT::i1:
6030 case MVT::i32:
6031 case MVT::i64:
6032 if (++NumGPRsUsed <= NumGPRs)
6033 continue;
6034 break;
6035 case MVT::v4i32:
6036 case MVT::v8i16:
6037 case MVT::v16i8:
6038 case MVT::v2f64:
6039 case MVT::v2i64:
6040 case MVT::v1i128:
6041 case MVT::f128:
6042 if (++NumVRsUsed <= NumVRs)
6043 continue;
6044 break;
6045 case MVT::v4f32:
6046 if (++NumVRsUsed <= NumVRs)
6047 continue;
6048 break;
6049 case MVT::f32:
6050 case MVT::f64:
6051 if (++NumFPRsUsed <= NumFPRs)
6052 continue;
6053 break;
6054 }
6055 HasParameterArea = true;
6056 }
6057 }
6058
6059 /* Respect alignment of argument on the stack. */
6060 auto Alignement =
6061 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6062 NumBytes = alignTo(NumBytes, Alignement);
6063
6064 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6065 if (Flags.isInConsecutiveRegsLast())
6066 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6067 }
6068
6069 unsigned NumBytesActuallyUsed = NumBytes;
6070
6071 // In the old ELFv1 ABI,
6072 // the prolog code of the callee may store up to 8 GPR argument registers to
6073 // the stack, allowing va_start to index over them in memory if its varargs.
6074 // Because we cannot tell if this is needed on the caller side, we have to
6075 // conservatively assume that it is needed. As such, make sure we have at
6076 // least enough stack space for the caller to store the 8 GPRs.
6077 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6078 // really requires memory operands, e.g. a vararg function.
6079 if (HasParameterArea)
6080 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6081 else
6082 NumBytes = LinkageSize;
6083
6084 // Tail call needs the stack to be aligned.
6085 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6086 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6087
6088 int SPDiff = 0;
6089
6090 // Calculate by how many bytes the stack has to be adjusted in case of tail
6091 // call optimization.
6092 if (!IsSibCall)
6093 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6094
6095 // To protect arguments on the stack from being clobbered in a tail call,
6096 // force all the loads to happen before doing any other lowering.
6097 if (CFlags.IsTailCall)
6098 Chain = DAG.getStackArgumentTokenFactor(Chain);
6099
6100 // Adjust the stack pointer for the new arguments...
6101 // These operations are automatically eliminated by the prolog/epilog pass
6102 if (!IsSibCall)
6103 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6104 SDValue CallSeqStart = Chain;
6105
6106 // Load the return address and frame pointer so it can be move somewhere else
6107 // later.
6108 SDValue LROp, FPOp;
6109 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6110
6111 // Set up a copy of the stack pointer for use loading and storing any
6112 // arguments that may not fit in the registers available for argument
6113 // passing.
6114 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6115
6116 // Figure out which arguments are going to go in registers, and which in
6117 // memory. Also, if this is a vararg function, floating point operations
6118 // must be stored to our stack, and loaded into integer regs as well, if
6119 // any integer regs are available for argument passing.
6120 unsigned ArgOffset = LinkageSize;
6121
6122 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6123 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6124
6125 SmallVector<SDValue, 8> MemOpChains;
6126 for (unsigned i = 0; i != NumOps; ++i) {
6127 SDValue Arg = OutVals[i];
6128 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6129 EVT ArgVT = Outs[i].VT;
6130 EVT OrigVT = Outs[i].ArgVT;
6131
6132 // PtrOff will be used to store the current argument to the stack if a
6133 // register cannot be found for it.
6134 SDValue PtrOff;
6135
6136 // We re-align the argument offset for each argument, except when using the
6137 // fast calling convention, when we need to make sure we do that only when
6138 // we'll actually use a stack slot.
6139 auto ComputePtrOff = [&]() {
6140 /* Respect alignment of argument on the stack. */
6141 auto Alignment =
6142 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6143 ArgOffset = alignTo(ArgOffset, Alignment);
6144
6145 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6146
6147 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6148 };
6149
6150 if (!IsFastCall) {
6151 ComputePtrOff();
6152
6153 /* Compute GPR index associated with argument offset. */
6154 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6155 GPR_idx = std::min(GPR_idx, NumGPRs);
6156 }
6157
6158 // Promote integers to 64-bit values.
6159 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6160 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6161 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6162 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6163 }
6164
6165 // FIXME memcpy is used way more than necessary. Correctness first.
6166 // Note: "by value" is code for passing a structure by value, not
6167 // basic types.
6168 if (Flags.isByVal()) {
6169 // Note: Size includes alignment padding, so
6170 // struct x { short a; char b; }
6171 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6172 // These are the proper values we need for right-justifying the
6173 // aggregate in a parameter register.
6174 unsigned Size = Flags.getByValSize();
6175
6176 // An empty aggregate parameter takes up no storage and no
6177 // registers.
6178 if (Size == 0)
6179 continue;
6180
6181 if (IsFastCall)
6182 ComputePtrOff();
6183
6184 // All aggregates smaller than 8 bytes must be passed right-justified.
6185 if (Size==1 || Size==2 || Size==4) {
6186 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6187 if (GPR_idx != NumGPRs) {
6188 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6189 MachinePointerInfo(), VT);
6190 MemOpChains.push_back(Load.getValue(1));
6191 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6192
6193 ArgOffset += PtrByteSize;
6194 continue;
6195 }
6196 }
6197
6198 if (GPR_idx == NumGPRs && Size < 8) {
6199 SDValue AddPtr = PtrOff;
6200 if (!isLittleEndian) {
6201 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6202 PtrOff.getValueType());
6203 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6204 }
6205 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6206 CallSeqStart,
6207 Flags, DAG, dl);
6208 ArgOffset += PtrByteSize;
6209 continue;
6210 }
6211 // Copy entire object into memory. There are cases where gcc-generated
6212 // code assumes it is there, even if it could be put entirely into
6213 // registers. (This is not what the doc says.)
6214
6215 // FIXME: The above statement is likely due to a misunderstanding of the
6216 // documents. All arguments must be copied into the parameter area BY
6217 // THE CALLEE in the event that the callee takes the address of any
6218 // formal argument. That has not yet been implemented. However, it is
6219 // reasonable to use the stack area as a staging area for the register
6220 // load.
6221
6222 // Skip this for small aggregates, as we will use the same slot for a
6223 // right-justified copy, below.
6224 if (Size >= 8)
6225 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6226 CallSeqStart,
6227 Flags, DAG, dl);
6228
6229 // When a register is available, pass a small aggregate right-justified.
6230 if (Size < 8 && GPR_idx != NumGPRs) {
6231 // The easiest way to get this right-justified in a register
6232 // is to copy the structure into the rightmost portion of a
6233 // local variable slot, then load the whole slot into the
6234 // register.
6235 // FIXME: The memcpy seems to produce pretty awful code for
6236 // small aggregates, particularly for packed ones.
6237 // FIXME: It would be preferable to use the slot in the
6238 // parameter save area instead of a new local variable.
6239 SDValue AddPtr = PtrOff;
6240 if (!isLittleEndian) {
6241 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6242 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6243 }
6244 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6245 CallSeqStart,
6246 Flags, DAG, dl);
6247
6248 // Load the slot into the register.
6249 SDValue Load =
6250 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6251 MemOpChains.push_back(Load.getValue(1));
6252 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6253
6254 // Done with this argument.
6255 ArgOffset += PtrByteSize;
6256 continue;
6257 }
6258
6259 // For aggregates larger than PtrByteSize, copy the pieces of the
6260 // object that fit into registers from the parameter save area.
6261 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6262 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6263 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6264 if (GPR_idx != NumGPRs) {
6265 SDValue Load =
6266 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6267 MemOpChains.push_back(Load.getValue(1));
6268 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6269 ArgOffset += PtrByteSize;
6270 } else {
6271 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6272 break;
6273 }
6274 }
6275 continue;
6276 }
6277
6278 switch (Arg.getSimpleValueType().SimpleTy) {
6279 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6279)
;
6280 case MVT::i1:
6281 case MVT::i32:
6282 case MVT::i64:
6283 if (Flags.isNest()) {
6284 // The 'nest' parameter, if any, is passed in R11.
6285 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6286 break;
6287 }
6288
6289 // These can be scalar arguments or elements of an integer array type
6290 // passed directly. Clang may use those instead of "byval" aggregate
6291 // types to avoid forcing arguments to memory unnecessarily.
6292 if (GPR_idx != NumGPRs) {
6293 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6294 } else {
6295 if (IsFastCall)
6296 ComputePtrOff();
6297
6298 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6299, __PRETTY_FUNCTION__))
6299 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6299, __PRETTY_FUNCTION__))
;
6300 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6301 true, CFlags.IsTailCall, false, MemOpChains,
6302 TailCallArguments, dl);
6303 if (IsFastCall)
6304 ArgOffset += PtrByteSize;
6305 }
6306 if (!IsFastCall)
6307 ArgOffset += PtrByteSize;
6308 break;
6309 case MVT::f32:
6310 case MVT::f64: {
6311 // These can be scalar arguments or elements of a float array type
6312 // passed directly. The latter are used to implement ELFv2 homogenous
6313 // float aggregates.
6314
6315 // Named arguments go into FPRs first, and once they overflow, the
6316 // remaining arguments go into GPRs and then the parameter save area.
6317 // Unnamed arguments for vararg functions always go to GPRs and
6318 // then the parameter save area. For now, put all arguments to vararg
6319 // routines always in both locations (FPR *and* GPR or stack slot).
6320 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6321 bool NeededLoad = false;
6322
6323 // First load the argument into the next available FPR.
6324 if (FPR_idx != NumFPRs)
6325 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6326
6327 // Next, load the argument into GPR or stack slot if needed.
6328 if (!NeedGPROrStack)
6329 ;
6330 else if (GPR_idx != NumGPRs && !IsFastCall) {
6331 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6332 // once we support fp <-> gpr moves.
6333
6334 // In the non-vararg case, this can only ever happen in the
6335 // presence of f32 array types, since otherwise we never run
6336 // out of FPRs before running out of GPRs.
6337 SDValue ArgVal;
6338
6339 // Double values are always passed in a single GPR.
6340 if (Arg.getValueType() != MVT::f32) {
6341 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6342
6343 // Non-array float values are extended and passed in a GPR.
6344 } else if (!Flags.isInConsecutiveRegs()) {
6345 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6346 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6347
6348 // If we have an array of floats, we collect every odd element
6349 // together with its predecessor into one GPR.
6350 } else if (ArgOffset % PtrByteSize != 0) {
6351 SDValue Lo, Hi;
6352 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6353 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6354 if (!isLittleEndian)
6355 std::swap(Lo, Hi);
6356 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6357
6358 // The final element, if even, goes into the first half of a GPR.
6359 } else if (Flags.isInConsecutiveRegsLast()) {
6360 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6361 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6362 if (!isLittleEndian)
6363 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6364 DAG.getConstant(32, dl, MVT::i32));
6365
6366 // Non-final even elements are skipped; they will be handled
6367 // together the with subsequent argument on the next go-around.
6368 } else
6369 ArgVal = SDValue();
6370
6371 if (ArgVal.getNode())
6372 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6373 } else {
6374 if (IsFastCall)
6375 ComputePtrOff();
6376
6377 // Single-precision floating-point values are mapped to the
6378 // second (rightmost) word of the stack doubleword.
6379 if (Arg.getValueType() == MVT::f32 &&
6380 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6381 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6382 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6383 }
6384
6385 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6386, __PRETTY_FUNCTION__))
6386 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6386, __PRETTY_FUNCTION__))
;
6387 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6388 true, CFlags.IsTailCall, false, MemOpChains,
6389 TailCallArguments, dl);
6390
6391 NeededLoad = true;
6392 }
6393 // When passing an array of floats, the array occupies consecutive
6394 // space in the argument area; only round up to the next doubleword
6395 // at the end of the array. Otherwise, each float takes 8 bytes.
6396 if (!IsFastCall || NeededLoad) {
6397 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6398 Flags.isInConsecutiveRegs()) ? 4 : 8;
6399 if (Flags.isInConsecutiveRegsLast())
6400 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6401 }
6402 break;
6403 }
6404 case MVT::v4f32:
6405 case MVT::v4i32:
6406 case MVT::v8i16:
6407 case MVT::v16i8:
6408 case MVT::v2f64:
6409 case MVT::v2i64:
6410 case MVT::v1i128:
6411 case MVT::f128:
6412 // These can be scalar arguments or elements of a vector array type
6413 // passed directly. The latter are used to implement ELFv2 homogenous
6414 // vector aggregates.
6415
6416 // For a varargs call, named arguments go into VRs or on the stack as
6417 // usual; unnamed arguments always go to the stack or the corresponding
6418 // GPRs when within range. For now, we always put the value in both
6419 // locations (or even all three).
6420 if (CFlags.IsVarArg) {
6421 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6422, __PRETTY_FUNCTION__))
6422 "Parameter area must exist if we have a varargs call.")((HasParameterArea && "Parameter area must exist if we have a varargs call."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist if we have a varargs call.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6422, __PRETTY_FUNCTION__))
;
6423 // We could elide this store in the case where the object fits
6424 // entirely in R registers. Maybe later.
6425 SDValue Store =
6426 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6427 MemOpChains.push_back(Store);
6428 if (VR_idx != NumVRs) {
6429 SDValue Load =
6430 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6431 MemOpChains.push_back(Load.getValue(1));
6432 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6433 }
6434 ArgOffset += 16;
6435 for (unsigned i=0; i<16; i+=PtrByteSize) {
6436 if (GPR_idx == NumGPRs)
6437 break;
6438 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6439 DAG.getConstant(i, dl, PtrVT));
6440 SDValue Load =
6441 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6442 MemOpChains.push_back(Load.getValue(1));
6443 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6444 }
6445 break;
6446 }
6447
6448 // Non-varargs Altivec params go into VRs or on the stack.
6449 if (VR_idx != NumVRs) {
6450 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6451 } else {
6452 if (IsFastCall)
6453 ComputePtrOff();
6454
6455 assert(HasParameterArea &&((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6456, __PRETTY_FUNCTION__))
6456 "Parameter area must exist to pass an argument in memory.")((HasParameterArea && "Parameter area must exist to pass an argument in memory."
) ? static_cast<void> (0) : __assert_fail ("HasParameterArea && \"Parameter area must exist to pass an argument in memory.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6456, __PRETTY_FUNCTION__))
;
6457 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6458 true, CFlags.IsTailCall, true, MemOpChains,
6459 TailCallArguments, dl);
6460 if (IsFastCall)
6461 ArgOffset += 16;
6462 }
6463
6464 if (!IsFastCall)
6465 ArgOffset += 16;
6466 break;
6467 }
6468 }
6469
6470 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6471, __PRETTY_FUNCTION__))
6471 "mismatch in size of parameter area")(((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
"mismatch in size of parameter area") ? static_cast<void>
(0) : __assert_fail ("(!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && \"mismatch in size of parameter area\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6471, __PRETTY_FUNCTION__))
;
6472 (void)NumBytesActuallyUsed;
6473
6474 if (!MemOpChains.empty())
6475 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6476
6477 // Check if this is an indirect call (MTCTR/BCTRL).
6478 // See prepareDescriptorIndirectCall and buildCallOperands for more
6479 // information about calls through function pointers in the 64-bit SVR4 ABI.
6480 if (CFlags.IsIndirect) {
6481 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6482 // caller in the TOC save area.
6483 if (isTOCSaveRestoreRequired(Subtarget)) {
6484 assert(!CFlags.IsTailCall && "Indirect tails calls not supported")((!CFlags.IsTailCall && "Indirect tails calls not supported"
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tails calls not supported\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6484, __PRETTY_FUNCTION__))
;
6485 // Load r2 into a virtual register and store it to the TOC save area.
6486 setUsesTOCBasePtr(DAG);
6487 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6488 // TOC save area offset.
6489 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6490 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6491 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6492 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6493 MachinePointerInfo::getStack(
6494 DAG.getMachineFunction(), TOCSaveOffset));
6495 }
6496 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6497 // This does not mean the MTCTR instruction must use R12; it's easier
6498 // to model this as an extra parameter, so do that.
6499 if (isELFv2ABI && !CFlags.IsPatchPoint)
6500 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6501 }
6502
6503 // Build a sequence of copy-to-reg nodes chained together with token chain
6504 // and flag operands which copy the outgoing args into the appropriate regs.
6505 SDValue InFlag;
6506 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6507 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6508 RegsToPass[i].second, InFlag);
6509 InFlag = Chain.getValue(1);
6510 }
6511
6512 if (CFlags.IsTailCall && !IsSibCall)
6513 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6514 TailCallArguments);
6515
6516 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6517 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6518}
6519
6520SDValue PPCTargetLowering::LowerCall_Darwin(
6521 SDValue Chain, SDValue Callee, CallFlags CFlags,
6522 const SmallVectorImpl<ISD::OutputArg> &Outs,
6523 const SmallVectorImpl<SDValue> &OutVals,
6524 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6525 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6526 const CallBase *CB) const {
6527 unsigned NumOps = Outs.size();
6528
6529 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6530 bool isPPC64 = PtrVT == MVT::i64;
6531 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6532
6533 MachineFunction &MF = DAG.getMachineFunction();
6534
6535 // Mark this function as potentially containing a function that contains a
6536 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6537 // and restoring the callers stack pointer in this functions epilog. This is
6538 // done because by tail calling the called function might overwrite the value
6539 // in this function's (MF) stack pointer stack slot 0(SP).
6540 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6541 CFlags.CallConv == CallingConv::Fast)
6542 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6543
6544 // Count how many bytes are to be pushed on the stack, including the linkage
6545 // area, and parameter passing area. We start with 24/48 bytes, which is
6546 // prereserved space for [SP][CR][LR][3 x unused].
6547 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6548 unsigned NumBytes = LinkageSize;
6549
6550 // Add up all the space actually used.
6551 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6552 // they all go in registers, but we must reserve stack space for them for
6553 // possible use by the caller. In varargs or 64-bit calls, parameters are
6554 // assigned stack space in order, with padding so Altivec parameters are
6555 // 16-byte aligned.
6556 unsigned nAltivecParamsAtEnd = 0;
6557 for (unsigned i = 0; i != NumOps; ++i) {
6558 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6559 EVT ArgVT = Outs[i].VT;
6560 // Varargs Altivec parameters are padded to a 16 byte boundary.
6561 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6562 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6563 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6564 if (!CFlags.IsVarArg && !isPPC64) {
6565 // Non-varargs Altivec parameters go after all the non-Altivec
6566 // parameters; handle those later so we know how much padding we need.
6567 nAltivecParamsAtEnd++;
6568 continue;
6569 }
6570 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6571 NumBytes = ((NumBytes+15)/16)*16;
6572 }
6573 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6574 }
6575
6576 // Allow for Altivec parameters at the end, if needed.
6577 if (nAltivecParamsAtEnd) {
6578 NumBytes = ((NumBytes+15)/16)*16;
6579 NumBytes += 16*nAltivecParamsAtEnd;
6580 }
6581
6582 // The prolog code of the callee may store up to 8 GPR argument registers to
6583 // the stack, allowing va_start to index over them in memory if its varargs.
6584 // Because we cannot tell if this is needed on the caller side, we have to
6585 // conservatively assume that it is needed. As such, make sure we have at
6586 // least enough stack space for the caller to store the 8 GPRs.
6587 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6588
6589 // Tail call needs the stack to be aligned.
6590 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6591 CFlags.CallConv == CallingConv::Fast)
6592 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6593
6594 // Calculate by how many bytes the stack has to be adjusted in case of tail
6595 // call optimization.
6596 int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6597
6598 // To protect arguments on the stack from being clobbered in a tail call,
6599 // force all the loads to happen before doing any other lowering.
6600 if (CFlags.IsTailCall)
6601 Chain = DAG.getStackArgumentTokenFactor(Chain);
6602
6603 // Adjust the stack pointer for the new arguments...
6604 // These operations are automatically eliminated by the prolog/epilog pass
6605 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6606 SDValue CallSeqStart = Chain;
6607
6608 // Load the return address and frame pointer so it can be move somewhere else
6609 // later.
6610 SDValue LROp, FPOp;
6611 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6612
6613 // Set up a copy of the stack pointer for use loading and storing any
6614 // arguments that may not fit in the registers available for argument
6615 // passing.
6616 SDValue StackPtr;
6617 if (isPPC64)
6618 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6619 else
6620 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6621
6622 // Figure out which arguments are going to go in registers, and which in
6623 // memory. Also, if this is a vararg function, floating point operations
6624 // must be stored to our stack, and loaded into integer regs as well, if
6625 // any integer regs are available for argument passing.
6626 unsigned ArgOffset = LinkageSize;
6627 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6628
6629 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6630 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6631 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6632 };
6633 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6634 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6635 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6636 };
6637 static const MCPhysReg VR[] = {
6638 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6639 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6640 };
6641 const unsigned NumGPRs = array_lengthof(GPR_32);
6642 const unsigned NumFPRs = 13;
6643 const unsigned NumVRs = array_lengthof(VR);
6644
6645 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6646
6647 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6648 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6649
6650 SmallVector<SDValue, 8> MemOpChains;
6651 for (unsigned i = 0; i != NumOps; ++i) {
6652 SDValue Arg = OutVals[i];
6653 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6654
6655 // PtrOff will be used to store the current argument to the stack if a
6656 // register cannot be found for it.
6657 SDValue PtrOff;
6658
6659 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6660
6661 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6662
6663 // On PPC64, promote integers to 64-bit values.
6664 if (isPPC64 && Arg.getValueType() == MVT::i32) {
6665 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6666 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6667 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6668 }
6669
6670 // FIXME memcpy is used way more than necessary. Correctness first.
6671 // Note: "by value" is code for passing a structure by value, not
6672 // basic types.
6673 if (Flags.isByVal()) {
6674 unsigned Size = Flags.getByValSize();
6675 // Very small objects are passed right-justified. Everything else is
6676 // passed left-justified.
6677 if (Size==1 || Size==2) {
6678 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6679 if (GPR_idx != NumGPRs) {
6680 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6681 MachinePointerInfo(), VT);
6682 MemOpChains.push_back(Load.getValue(1));
6683 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6684
6685 ArgOffset += PtrByteSize;
6686 } else {
6687 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6688 PtrOff.getValueType());
6689 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6690 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6691 CallSeqStart,
6692 Flags, DAG, dl);
6693 ArgOffset += PtrByteSize;
6694 }
6695 continue;
6696 }
6697 // Copy entire object into memory. There are cases where gcc-generated
6698 // code assumes it is there, even if it could be put entirely into
6699 // registers. (This is not what the doc says.)
6700 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6701 CallSeqStart,
6702 Flags, DAG, dl);
6703
6704 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6705 // copy the pieces of the object that fit into registers from the
6706 // parameter save area.
6707 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6708 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6709 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6710 if (GPR_idx != NumGPRs) {
6711 SDValue Load =
6712 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6713 MemOpChains.push_back(Load.getValue(1));
6714 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6715 ArgOffset += PtrByteSize;
6716 } else {
6717 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6718 break;
6719 }
6720 }
6721 continue;
6722 }
6723
6724 switch (Arg.getSimpleValueType().SimpleTy) {
6725 default: llvm_unreachable("Unexpected ValueType for argument!")::llvm::llvm_unreachable_internal("Unexpected ValueType for argument!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6725)
;
6726 case MVT::i1:
6727 case MVT::i32:
6728 case MVT::i64:
6729 if (GPR_idx != NumGPRs) {
6730 if (Arg.getValueType() == MVT::i1)
6731 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6732
6733 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6734 } else {
6735 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6736 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6737 TailCallArguments, dl);
6738 }
6739 ArgOffset += PtrByteSize;
6740 break;
6741 case MVT::f32:
6742 case MVT::f64:
6743 if (FPR_idx != NumFPRs) {
6744 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6745
6746 if (CFlags.IsVarArg) {
6747 SDValue Store =
6748 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6749 MemOpChains.push_back(Store);
6750
6751 // Float varargs are always shadowed in available integer registers
6752 if (GPR_idx != NumGPRs) {
6753 SDValue Load =
6754 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6755 MemOpChains.push_back(Load.getValue(1));
6756 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6757 }
6758 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6759 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6760 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6761 SDValue Load =
6762 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6763 MemOpChains.push_back(Load.getValue(1));
6764 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6765 }
6766 } else {
6767 // If we have any FPRs remaining, we may also have GPRs remaining.
6768 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6769 // GPRs.
6770 if (GPR_idx != NumGPRs)
6771 ++GPR_idx;
6772 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6773 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6774 ++GPR_idx;
6775 }
6776 } else
6777 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6778 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6779 TailCallArguments, dl);
6780 if (isPPC64)
6781 ArgOffset += 8;
6782 else
6783 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6784 break;
6785 case MVT::v4f32:
6786 case MVT::v4i32:
6787 case MVT::v8i16:
6788 case MVT::v16i8:
6789 if (CFlags.IsVarArg) {
6790 // These go aligned on the stack, or in the corresponding R registers
6791 // when within range. The Darwin PPC ABI doc claims they also go in
6792 // V registers; in fact gcc does this only for arguments that are
6793 // prototyped, not for those that match the ... We do it for all
6794 // arguments, seems to work.
6795 while (ArgOffset % 16 !=0) {
6796 ArgOffset += PtrByteSize;
6797 if (GPR_idx != NumGPRs)
6798 GPR_idx++;
6799 }
6800 // We could elide this store in the case where the object fits
6801 // entirely in R registers. Maybe later.
6802 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6803 DAG.getConstant(ArgOffset, dl, PtrVT));
6804 SDValue Store =
6805 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6806 MemOpChains.push_back(Store);
6807 if (VR_idx != NumVRs) {
6808 SDValue Load =
6809 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6810 MemOpChains.push_back(Load.getValue(1));
6811 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6812 }
6813 ArgOffset += 16;
6814 for (unsigned i=0; i<16; i+=PtrByteSize) {
6815 if (GPR_idx == NumGPRs)
6816 break;
6817 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6818 DAG.getConstant(i, dl, PtrVT));
6819 SDValue Load =
6820 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6821 MemOpChains.push_back(Load.getValue(1));
6822 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6823 }
6824 break;
6825 }
6826
6827 // Non-varargs Altivec params generally go in registers, but have
6828 // stack space allocated at the end.
6829 if (VR_idx != NumVRs) {
6830 // Doesn't have GPR space allocated.
6831 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6832 } else if (nAltivecParamsAtEnd==0) {
6833 // We are emitting Altivec params in order.
6834 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6835 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6836 TailCallArguments, dl);
6837 ArgOffset += 16;
6838 }
6839 break;
6840 }
6841 }
6842 // If all Altivec parameters fit in registers, as they usually do,
6843 // they get stack space following the non-Altivec parameters. We
6844 // don't track this here because nobody below needs it.
6845 // If there are more Altivec parameters than fit in registers emit
6846 // the stores here.
6847 if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6848 unsigned j = 0;
6849 // Offset is aligned; skip 1st 12 params which go in V registers.
6850 ArgOffset = ((ArgOffset+15)/16)*16;
6851 ArgOffset += 12*16;
6852 for (unsigned i = 0; i != NumOps; ++i) {
6853 SDValue Arg = OutVals[i];
6854 EVT ArgType = Outs[i].VT;
6855 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6856 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6857 if (++j > NumVRs) {
6858 SDValue PtrOff;
6859 // We are emitting Altivec params in order.
6860 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6861 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6862 TailCallArguments, dl);
6863 ArgOffset += 16;
6864 }
6865 }
6866 }
6867 }
6868
6869 if (!MemOpChains.empty())
6870 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6871
6872 // On Darwin, R12 must contain the address of an indirect callee. This does
6873 // not mean the MTCTR instruction must use R12; it's easier to model this as
6874 // an extra parameter, so do that.
6875 if (CFlags.IsIndirect) {
6876 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6876, __PRETTY_FUNCTION__))
;
6877 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6878 PPC::R12), Callee));
6879 }
6880
6881 // Build a sequence of copy-to-reg nodes chained together with token chain
6882 // and flag operands which copy the outgoing args into the appropriate regs.
6883 SDValue InFlag;
6884 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6885 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6886 RegsToPass[i].second, InFlag);
6887 InFlag = Chain.getValue(1);
6888 }
6889
6890 if (CFlags.IsTailCall)
6891 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6892 TailCallArguments);
6893
6894 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6895 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6896}
6897
6898static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6899 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6900 CCState &State) {
6901
6902 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6903 State.getMachineFunction().getSubtarget());
6904 const bool IsPPC64 = Subtarget.isPPC64();
6905 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6906 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6907
6908 assert((!ValVT.isInteger() ||(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6910, __PRETTY_FUNCTION__))
6909 (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6910, __PRETTY_FUNCTION__))
6910 "Integer argument exceeds register size: should have been legalized")(((!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT
.getFixedSizeInBits())) && "Integer argument exceeds register size: should have been legalized"
) ? static_cast<void> (0) : __assert_fail ("(!ValVT.isInteger() || (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && \"Integer argument exceeds register size: should have been legalized\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6910, __PRETTY_FUNCTION__))
;
6911
6912 if (ValVT == MVT::f128)
6913 report_fatal_error("f128 is unimplemented on AIX.");
6914
6915 if (ArgFlags.isNest())
6916 report_fatal_error("Nest arguments are unimplemented.");
6917
6918 if (ValVT.isVector() || LocVT.isVector())
6919 report_fatal_error("Vector arguments are unimplemented on AIX.");
6920
6921 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6922 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6923 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6924 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6925 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6926 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6927
6928 if (ArgFlags.isByVal()) {
6929 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6930 report_fatal_error("Pass-by-value arguments with alignment greater than "
6931 "register width are not supported.");
6932
6933 const unsigned ByValSize = ArgFlags.getByValSize();
6934
6935 // An empty aggregate parameter takes up no storage and no registers,
6936 // but needs a MemLoc for a stack slot for the formal arguments side.
6937 if (ByValSize == 0) {
6938 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6939 State.getNextStackOffset(), RegVT,
6940 LocInfo));
6941 return false;
6942 }
6943
6944 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6945 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6946 for (const unsigned E = Offset + StackSize; Offset < E;
6947 Offset += PtrAlign.value()) {
6948 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6949 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6950 else {
6951 State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6952 Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6953 LocInfo));
6954 break;
6955 }
6956 }
6957 return false;
6958 }
6959
6960 // Arguments always reserve parameter save area.
6961 switch (ValVT.SimpleTy) {
6962 default:
6963 report_fatal_error("Unhandled value type for argument.");
6964 case MVT::i64:
6965 // i64 arguments should have been split to i32 for PPC32.
6966 assert(IsPPC64 && "PPC32 should have split i64 values.")((IsPPC64 && "PPC32 should have split i64 values.") ?
static_cast<void> (0) : __assert_fail ("IsPPC64 && \"PPC32 should have split i64 values.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6966, __PRETTY_FUNCTION__))
;
6967 LLVM_FALLTHROUGH[[gnu::fallthrough]];
6968 case MVT::i1:
6969 case MVT::i32: {
6970 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6971 // AIX integer arguments are always passed in register width.
6972 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6973 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6974 : CCValAssign::LocInfo::ZExt;
6975 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6976 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6977 else
6978 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6979
6980 return false;
6981 }
6982 case MVT::f32:
6983 case MVT::f64: {
6984 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6985 const unsigned StoreSize = LocVT.getStoreSize();
6986 // Floats are always 4-byte aligned in the PSA on AIX.
6987 // This includes f64 in 64-bit mode for ABI compatibility.
6988 const unsigned Offset =
6989 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6990 unsigned FReg = State.AllocateReg(FPR);
6991 if (FReg)
6992 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6993
6994 // Reserve and initialize GPRs or initialize the PSA as required.
6995 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6996 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6997 assert(FReg && "An FPR should be available when a GPR is reserved.")((FReg && "An FPR should be available when a GPR is reserved."
) ? static_cast<void> (0) : __assert_fail ("FReg && \"An FPR should be available when a GPR is reserved.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 6997, __PRETTY_FUNCTION__))
;
6998 if (State.isVarArg()) {
6999 // Successfully reserved GPRs are only initialized for vararg calls.
7000 // Custom handling is required for:
7001 // f64 in PPC32 needs to be split into 2 GPRs.
7002 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7003 State.addLoc(
7004 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7005 }
7006 } else {
7007 // If there are insufficient GPRs, the PSA needs to be initialized.
7008 // Initialization occurs even if an FPR was initialized for
7009 // compatibility with the AIX XL compiler. The full memory for the
7010 // argument will be initialized even if a prior word is saved in GPR.
7011 // A custom memLoc is used when the argument also passes in FPR so
7012 // that the callee handling can skip over it easily.
7013 State.addLoc(
7014 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7015 LocInfo)
7016 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7017 break;
7018 }
7019 }
7020
7021 return false;
7022 }
7023 }
7024 return true;
7025}
7026
7027static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7028 bool IsPPC64) {
7029 assert((IsPPC64 || SVT != MVT::i64) &&(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7030, __PRETTY_FUNCTION__))
7030 "i64 should have been split for 32-bit codegen.")(((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."
) ? static_cast<void> (0) : __assert_fail ("(IsPPC64 || SVT != MVT::i64) && \"i64 should have been split for 32-bit codegen.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7030, __PRETTY_FUNCTION__))
;
7031
7032 switch (SVT) {
7033 default:
7034 report_fatal_error("Unexpected value type for formal argument");
7035 case MVT::i1:
7036 case MVT::i32:
7037 case MVT::i64:
7038 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7039 case MVT::f32:
7040 return &PPC::F4RCRegClass;
7041 case MVT::f64:
7042 return &PPC::F8RCRegClass;
7043 }
7044}
7045
7046static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7047 SelectionDAG &DAG, SDValue ArgValue,
7048 MVT LocVT, const SDLoc &dl) {
7049 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger())((ValVT.isScalarInteger() && LocVT.isScalarInteger())
? static_cast<void> (0) : __assert_fail ("ValVT.isScalarInteger() && LocVT.isScalarInteger()"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7049, __PRETTY_FUNCTION__))
;
7050 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())((ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())
? static_cast<void> (0) : __assert_fail ("ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7050, __PRETTY_FUNCTION__))
;
7051
7052 if (Flags.isSExt())
7053 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7054 DAG.getValueType(ValVT));
7055 else if (Flags.isZExt())
7056 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7057 DAG.getValueType(ValVT));
7058
7059 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7060}
7061
7062static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7063 const unsigned LASize = FL->getLinkageSize();
7064
7065 if (PPC::GPRCRegClass.contains(Reg)) {
7066 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7067, __PRETTY_FUNCTION__))
7067 "Reg must be a valid argument register!")((Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::R3 && Reg <= PPC::R10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7067, __PRETTY_FUNCTION__))
;
7068 return LASize + 4 * (Reg - PPC::R3);
7069 }
7070
7071 if (PPC::G8RCRegClass.contains(Reg)) {
7072 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7073, __PRETTY_FUNCTION__))
7073 "Reg must be a valid argument register!")((Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"
) ? static_cast<void> (0) : __assert_fail ("Reg >= PPC::X3 && Reg <= PPC::X10 && \"Reg must be a valid argument register!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7073, __PRETTY_FUNCTION__))
;
7074 return LASize + 8 * (Reg - PPC::X3);
7075 }
7076
7077 llvm_unreachable("Only general purpose registers expected.")::llvm::llvm_unreachable_internal("Only general purpose registers expected."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7077)
;
7078}
7079
7080// AIX ABI Stack Frame Layout:
7081//
7082// Low Memory +--------------------------------------------+
7083// SP +---> | Back chain | ---+
7084// | +--------------------------------------------+ |
7085// | | Saved Condition Register | |
7086// | +--------------------------------------------+ |
7087// | | Saved Linkage Register | |
7088// | +--------------------------------------------+ | Linkage Area
7089// | | Reserved for compilers | |
7090// | +--------------------------------------------+ |
7091// | | Reserved for binders | |
7092// | +--------------------------------------------+ |
7093// | | Saved TOC pointer | ---+
7094// | +--------------------------------------------+
7095// | | Parameter save area |
7096// | +--------------------------------------------+
7097// | | Alloca space |
7098// | +--------------------------------------------+
7099// | | Local variable space |
7100// | +--------------------------------------------+
7101// | | Float/int conversion temporary |
7102// | +--------------------------------------------+
7103// | | Save area for AltiVec registers |
7104// | +--------------------------------------------+
7105// | | AltiVec alignment padding |
7106// | +--------------------------------------------+
7107// | | Save area for VRSAVE register |
7108// | +--------------------------------------------+
7109// | | Save area for General Purpose registers |
7110// | +--------------------------------------------+
7111// | | Save area for Floating Point registers |
7112// | +--------------------------------------------+
7113// +---- | Back chain |
7114// High Memory +--------------------------------------------+
7115//
7116// Specifications:
7117// AIX 7.2 Assembler Language Reference
7118// Subroutine linkage convention
7119
7120SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7121 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7122 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7123 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7124
7125 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7127, __PRETTY_FUNCTION__))
7126 CallConv == CallingConv::Fast) &&(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7127, __PRETTY_FUNCTION__))
7127 "Unexpected calling convention!")(((CallConv == CallingConv::C || CallConv == CallingConv::Cold
|| CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7127, __PRETTY_FUNCTION__))
;
7128
7129 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7130 report_fatal_error("Tail call support is unimplemented on AIX.");
7131
7132 if (useSoftFloat())
7133 report_fatal_error("Soft float support is unimplemented on AIX.");
7134
7135 const PPCSubtarget &Subtarget =
7136 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7137
7138 const bool IsPPC64 = Subtarget.isPPC64();
7139 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7140
7141 // Assign locations to all of the incoming arguments.
7142 SmallVector<CCValAssign, 16> ArgLocs;
7143 MachineFunction &MF = DAG.getMachineFunction();
7144 MachineFrameInfo &MFI = MF.getFrameInfo();
7145 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7146
7147 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7148 // Reserve space for the linkage area on the stack.
7149 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7150 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7151 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7152
7153 SmallVector<SDValue, 8> MemOps;
7154
7155 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7156 CCValAssign &VA = ArgLocs[I++];
7157 MVT LocVT = VA.getLocVT();
7158 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7159
7160 // For compatibility with the AIX XL compiler, the float args in the
7161 // parameter save area are initialized even if the argument is available
7162 // in register. The caller is required to initialize both the register
7163 // and memory, however, the callee can choose to expect it in either.
7164 // The memloc is dismissed here because the argument is retrieved from
7165 // the register.
7166 if (VA.isMemLoc() && VA.needsCustom())
7167 continue;
7168
7169 if (Flags.isByVal() && VA.isMemLoc()) {
7170 const unsigned Size =
7171 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7172 PtrByteSize);
7173 const int FI = MF.getFrameInfo().CreateFixedObject(
7174 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7175 /* IsAliased */ true);
7176 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7177 InVals.push_back(FIN);
7178
7179 continue;
7180 }
7181
7182 if (Flags.isByVal()) {
7183 assert(VA.isRegLoc() && "MemLocs should already be handled.")((VA.isRegLoc() && "MemLocs should already be handled."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"MemLocs should already be handled.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7183, __PRETTY_FUNCTION__))
;
7184
7185 const MCPhysReg ArgReg = VA.getLocReg();
7186 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7187
7188 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7189 report_fatal_error("Over aligned byvals not supported yet.");
7190
7191 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7192 const int FI = MF.getFrameInfo().CreateFixedObject(
7193 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7194 /* IsAliased */ true);
7195 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7196 InVals.push_back(FIN);
7197
7198 // Add live ins for all the RegLocs for the same ByVal.
7199 const TargetRegisterClass *RegClass =
7200 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7201
7202 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7203 unsigned Offset) {
7204 const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7205 // Since the callers side has left justified the aggregate in the
7206 // register, we can simply store the entire register into the stack
7207 // slot.
7208 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7209 // The store to the fixedstack object is needed becuase accessing a
7210 // field of the ByVal will use a gep and load. Ideally we will optimize
7211 // to extracting the value from the register directly, and elide the
7212 // stores when the arguments address is not taken, but that will need to
7213 // be future work.
7214 SDValue Store = DAG.getStore(
7215 CopyFrom.getValue(1), dl, CopyFrom,
7216 DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7217 MachinePointerInfo::getFixedStack(MF, FI, Offset));
7218
7219 MemOps.push_back(Store);
7220 };
7221
7222 unsigned Offset = 0;
7223 HandleRegLoc(VA.getLocReg(), Offset);
7224 Offset += PtrByteSize;
7225 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7226 Offset += PtrByteSize) {
7227 assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7228, __PRETTY_FUNCTION__))
7228 "RegLocs should be for ByVal argument.")((ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"RegLocs should be for ByVal argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7228, __PRETTY_FUNCTION__))
;
7229
7230 const CCValAssign RL = ArgLocs[I++];
7231 HandleRegLoc(RL.getLocReg(), Offset);
7232 }
7233
7234 if (Offset != StackSize) {
7235 assert(ArgLocs[I].getValNo() == VA.getValNo() &&((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))
7236 "Expected MemLoc for remaining bytes.")((ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == VA.getValNo() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7236, __PRETTY_FUNCTION__))
;
7237 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.")((ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].isMemLoc() && \"Expected MemLoc for remaining bytes.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7237, __PRETTY_FUNCTION__))
;
7238 // Consume the MemLoc.The InVal has already been emitted, so nothing
7239 // more needs to be done.
7240 ++I;
7241 }
7242
7243 continue;
7244 }
7245
7246 EVT ValVT = VA.getValVT();
7247 if (VA.isRegLoc() && !VA.needsCustom()) {
7248 MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7249 unsigned VReg =
7250 MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7251 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7252 if (ValVT.isScalarInteger() &&
7253 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7254 ArgValue =
7255 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7256 }
7257 InVals.push_back(ArgValue);
7258 continue;
7259 }
7260 if (VA.isMemLoc()) {
7261 const unsigned LocSize = LocVT.getStoreSize();
7262 const unsigned ValSize = ValVT.getStoreSize();
7263 assert((ValSize <= LocSize) &&(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7264, __PRETTY_FUNCTION__))
7264 "Object size is larger than size of MemLoc")(((ValSize <= LocSize) && "Object size is larger than size of MemLoc"
) ? static_cast<void> (0) : __assert_fail ("(ValSize <= LocSize) && \"Object size is larger than size of MemLoc\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7264, __PRETTY_FUNCTION__))
;
7265 int CurArgOffset = VA.getLocMemOffset();
7266 // Objects are right-justified because AIX is big-endian.
7267 if (LocSize > ValSize)
7268 CurArgOffset += LocSize - ValSize;
7269 // Potential tail calls could cause overwriting of argument stack slots.
7270 const bool IsImmutable =
7271 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7272 (CallConv == CallingConv::Fast));
7273 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7274 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7275 SDValue ArgValue =
7276 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7277 InVals.push_back(ArgValue);
7278 continue;
7279 }
7280 }
7281
7282 // On AIX a minimum of 8 words is saved to the parameter save area.
7283 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7284 // Area that is at least reserved in the caller of this function.
7285 unsigned CallerReservedArea =
7286 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7287
7288 // Set the size that is at least reserved in caller of this function. Tail
7289 // call optimized function's reserved stack space needs to be aligned so
7290 // that taking the difference between two stack areas will result in an
7291 // aligned stack.
7292 CallerReservedArea =
7293 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7294 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7295 FuncInfo->setMinReservedArea(CallerReservedArea);
7296
7297 if (isVarArg) {
7298 FuncInfo->setVarArgsFrameIndex(
7299 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7300 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7301
7302 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7303 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7304
7305 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7306 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7307 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7308
7309 // The fixed integer arguments of a variadic function are stored to the
7310 // VarArgsFrameIndex on the stack so that they may be loaded by
7311 // dereferencing the result of va_next.
7312 for (unsigned GPRIndex =
7313 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7314 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7315
7316 const unsigned VReg =
7317 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7318 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7319
7320 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7321 SDValue Store =
7322 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7323 MemOps.push_back(Store);
7324 // Increment the address for the next argument to store.
7325 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7326 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7327 }
7328 }
7329
7330 if (!MemOps.empty())
7331 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7332
7333 return Chain;
7334}
7335
7336SDValue PPCTargetLowering::LowerCall_AIX(
7337 SDValue Chain, SDValue Callee, CallFlags CFlags,
7338 const SmallVectorImpl<ISD::OutputArg> &Outs,
7339 const SmallVectorImpl<SDValue> &OutVals,
7340 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7341 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7342 const CallBase *CB) const {
7343 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7344 // AIX ABI stack frame layout.
7345
7346 assert((CFlags.CallConv == CallingConv::C ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))
7347 CFlags.CallConv == CallingConv::Cold ||(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))
7348 CFlags.CallConv == CallingConv::Fast) &&(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))
7349 "Unexpected calling convention!")(((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv
::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"
) ? static_cast<void> (0) : __assert_fail ("(CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && \"Unexpected calling convention!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7349, __PRETTY_FUNCTION__))
;
7350
7351 if (CFlags.IsPatchPoint)
7352 report_fatal_error("This call type is unimplemented on AIX.");
7353
7354 const PPCSubtarget& Subtarget =
7355 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7356 if (Subtarget.hasAltivec())
7357 report_fatal_error("Altivec support is unimplemented on AIX.");
7358
7359 MachineFunction &MF = DAG.getMachineFunction();
7360 SmallVector<CCValAssign, 16> ArgLocs;
7361 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7362 *DAG.getContext());
7363
7364 // Reserve space for the linkage save area (LSA) on the stack.
7365 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7366 // [SP][CR][LR][2 x reserved][TOC].
7367 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7368 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7369 const bool IsPPC64 = Subtarget.isPPC64();
7370 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7371 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7372 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7373 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7374
7375 // The prolog code of the callee may store up to 8 GPR argument registers to
7376 // the stack, allowing va_start to index over them in memory if the callee
7377 // is variadic.
7378 // Because we cannot tell if this is needed on the caller side, we have to
7379 // conservatively assume that it is needed. As such, make sure we have at
7380 // least enough stack space for the caller to store the 8 GPRs.
7381 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7382 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7383 CCInfo.getNextStackOffset());
7384
7385 // Adjust the stack pointer for the new arguments...
7386 // These operations are automatically eliminated by the prolog/epilog pass.
7387 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7388 SDValue CallSeqStart = Chain;
7389
7390 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7391 SmallVector<SDValue, 8> MemOpChains;
7392
7393 // Set up a copy of the stack pointer for loading and storing any
7394 // arguments that may not fit in the registers available for argument
7395 // passing.
7396 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7397 : DAG.getRegister(PPC::R1, MVT::i32);
7398
7399 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7400 const unsigned ValNo = ArgLocs[I].getValNo();
7401 SDValue Arg = OutVals[ValNo];
7402 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7403
7404 if (Flags.isByVal()) {
7405 const unsigned ByValSize = Flags.getByValSize();
7406
7407 // Nothing to do for zero-sized ByVals on the caller side.
7408 if (!ByValSize) {
7409 ++I;
7410 continue;
7411 }
7412
7413 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7414 return DAG.getExtLoad(
7415 ISD::ZEXTLOAD, dl, PtrVT, Chain,
7416 (LoadOffset != 0)
7417 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7418 : Arg,
7419 MachinePointerInfo(), VT);
7420 };
7421
7422 unsigned LoadOffset = 0;
7423
7424 // Initialize registers, which are fully occupied by the by-val argument.
7425 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7426 SDValue Load = GetLoad(PtrVT, LoadOffset);
7427 MemOpChains.push_back(Load.getValue(1));
7428 LoadOffset += PtrByteSize;
7429 const CCValAssign &ByValVA = ArgLocs[I++];
7430 assert(ByValVA.getValNo() == ValNo &&((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7431, __PRETTY_FUNCTION__))
7431 "Unexpected location for pass-by-value argument.")((ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ByValVA.getValNo() == ValNo && \"Unexpected location for pass-by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7431, __PRETTY_FUNCTION__))
;
7432 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7433 }
7434
7435 if (LoadOffset == ByValSize)
7436 continue;
7437
7438 // There must be one more loc to handle the remainder.
7439 assert(ArgLocs[I].getValNo() == ValNo &&((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7440, __PRETTY_FUNCTION__))
7440 "Expected additional location for by-value argument.")((ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ArgLocs[I].getValNo() == ValNo && \"Expected additional location for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7440, __PRETTY_FUNCTION__))
;
7441
7442 if (ArgLocs[I].isMemLoc()) {
7443 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.")((LoadOffset < ByValSize && "Unexpected memloc for by-val arg."
) ? static_cast<void> (0) : __assert_fail ("LoadOffset < ByValSize && \"Unexpected memloc for by-val arg.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7443, __PRETTY_FUNCTION__))
;
7444 const CCValAssign &ByValVA = ArgLocs[I++];
7445 ISD::ArgFlagsTy MemcpyFlags = Flags;
7446 // Only memcpy the bytes that don't pass in register.
7447 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7448 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7449 (LoadOffset != 0)
7450 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7451 : Arg,
7452 DAG.getObjectPtrOffset(dl, StackPtr,
7453 TypeSize::Fixed(ByValVA.getLocMemOffset())),
7454 CallSeqStart, MemcpyFlags, DAG, dl);
7455 continue;
7456 }
7457
7458 // Initialize the final register residue.
7459 // Any residue that occupies the final by-val arg register must be
7460 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7461 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7462 // 2 and 1 byte loads.
7463 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7464 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7465, __PRETTY_FUNCTION__))
7465 "Unexpected register residue for by-value argument.")((ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize
&& "Unexpected register residue for by-value argument."
) ? static_cast<void> (0) : __assert_fail ("ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && \"Unexpected register residue for by-value argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7465, __PRETTY_FUNCTION__))
;
7466 SDValue ResidueVal;
7467 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7468 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7469 const MVT VT =
7470 N == 1 ? MVT::i8
7471 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7472 SDValue Load = GetLoad(VT, LoadOffset);
7473 MemOpChains.push_back(Load.getValue(1));
7474 LoadOffset += N;
7475 Bytes += N;
7476
7477 // By-val arguments are passed left-justfied in register.
7478 // Every load here needs to be shifted, otherwise a full register load
7479 // should have been used.
7480 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7482, __PRETTY_FUNCTION__))
7481 "Unexpected load emitted during handling of pass-by-value "((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7482, __PRETTY_FUNCTION__))
7482 "argument.")((PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
"Unexpected load emitted during handling of pass-by-value " "argument."
) ? static_cast<void> (0) : __assert_fail ("PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && \"Unexpected load emitted during handling of pass-by-value \" \"argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7482, __PRETTY_FUNCTION__))
;
7483 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7484 EVT ShiftAmountTy =
7485 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7486 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7487 SDValue ShiftedLoad =
7488 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7489 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7490 ShiftedLoad)
7491 : ShiftedLoad;
7492 }
7493
7494 const CCValAssign &ByValVA = ArgLocs[I++];
7495 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7496 continue;
7497 }
7498
7499 CCValAssign &VA = ArgLocs[I++];
7500 const MVT LocVT = VA.getLocVT();
7501 const MVT ValVT = VA.getValVT();
7502
7503 switch (VA.getLocInfo()) {
7504 default:
7505 report_fatal_error("Unexpected argument extension type.");
7506 case CCValAssign::Full:
7507 break;
7508 case CCValAssign::ZExt:
7509 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7510 break;
7511 case CCValAssign::SExt:
7512 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7513 break;
7514 }
7515
7516 if (VA.isRegLoc() && !VA.needsCustom()) {
7517 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7518 continue;
7519 }
7520
7521 if (VA.isMemLoc()) {
7522 SDValue PtrOff =
7523 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7524 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7525 MemOpChains.push_back(
7526 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7527
7528 continue;
7529 }
7530
7531 // Custom handling is used for GPR initializations for vararg float
7532 // arguments.
7533 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))
7534 ValVT.isFloatingPoint() && LocVT.isInteger() &&((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))
7535 "Unexpected register handling for calling convention.")((VA.isRegLoc() && VA.needsCustom() && CFlags
.IsVarArg && ValVT.isFloatingPoint() && LocVT
.isInteger() && "Unexpected register handling for calling convention."
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && \"Unexpected register handling for calling convention.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))
;
7536
7537 SDValue ArgAsInt =
7538 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7539
7540 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7541 // f32 in 32-bit GPR
7542 // f64 in 64-bit GPR
7543 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7544 else if (Arg.getValueType().getFixedSizeInBits() <
7545 LocVT.getFixedSizeInBits())
7546 // f32 in 64-bit GPR.
7547 RegsToPass.push_back(std::make_pair(
7548 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7549 else {
7550 // f64 in two 32-bit GPRs
7551 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7552 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7553, __PRETTY_FUNCTION__))
7553 "Unexpected custom register for argument!")((Arg.getValueType() == MVT::f64 && CFlags.IsVarArg &&
!IsPPC64 && "Unexpected custom register for argument!"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && \"Unexpected custom register for argument!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7553, __PRETTY_FUNCTION__))
;
7554 CCValAssign &GPR1 = VA;
7555 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7556 DAG.getConstant(32, dl, MVT::i8));
7557 RegsToPass.push_back(std::make_pair(
7558 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7559
7560 if (I != E) {
7561 // If only 1 GPR was available, there will only be one custom GPR and
7562 // the argument will also pass in memory.
7563 CCValAssign &PeekArg = ArgLocs[I];
7564 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7565 assert(PeekArg.needsCustom() && "A second custom GPR is expected.")((PeekArg.needsCustom() && "A second custom GPR is expected."
) ? static_cast<void> (0) : __assert_fail ("PeekArg.needsCustom() && \"A second custom GPR is expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7565, __PRETTY_FUNCTION__))
;
7566 CCValAssign &GPR2 = ArgLocs[I++];
7567 RegsToPass.push_back(std::make_pair(
7568 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7569 }
7570 }
7571 }
7572 }
7573
7574 if (!MemOpChains.empty())
7575 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7576
7577 // For indirect calls, we need to save the TOC base to the stack for
7578 // restoration after the call.
7579 if (CFlags.IsIndirect) {
7580 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.")((!CFlags.IsTailCall && "Indirect tail-calls not supported."
) ? static_cast<void> (0) : __assert_fail ("!CFlags.IsTailCall && \"Indirect tail-calls not supported.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7580, __PRETTY_FUNCTION__))
;
7581 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7582 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7583 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7584 const unsigned TOCSaveOffset =
7585 Subtarget.getFrameLowering()->getTOCSaveOffset();
7586
7587 setUsesTOCBasePtr(DAG);
7588 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7589 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7590 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7591 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7592 Chain = DAG.getStore(
7593 Val.getValue(1), dl, Val, AddPtr,
7594 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7595 }
7596
7597 // Build a sequence of copy-to-reg nodes chained together with token chain
7598 // and flag operands which copy the outgoing args into the appropriate regs.
7599 SDValue InFlag;
7600 for (auto Reg : RegsToPass) {
7601 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7602 InFlag = Chain.getValue(1);
7603 }
7604
7605 const int SPDiff = 0;
7606 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7607 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7608}
7609
7610bool
7611PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7612 MachineFunction &MF, bool isVarArg,
7613 const SmallVectorImpl<ISD::OutputArg> &Outs,
7614 LLVMContext &Context) const {
7615 SmallVector<CCValAssign, 16> RVLocs;
7616 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7617 return CCInfo.CheckReturn(
7618 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7619 ? RetCC_PPC_Cold
7620 : RetCC_PPC);
7621}
7622
7623SDValue
7624PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7625 bool isVarArg,
7626 const SmallVectorImpl<ISD::OutputArg> &Outs,
7627 const SmallVectorImpl<SDValue> &OutVals,
7628 const SDLoc &dl, SelectionDAG &DAG) const {
7629 SmallVector<CCValAssign, 16> RVLocs;
7630 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7631 *DAG.getContext());
7632 CCInfo.AnalyzeReturn(Outs,
7633 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7634 ? RetCC_PPC_Cold
7635 : RetCC_PPC);
7636
7637 SDValue Flag;
7638 SmallVector<SDValue, 4> RetOps(1, Chain);
7639
7640 // Copy the result values into the output registers.
7641 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7642 CCValAssign &VA = RVLocs[i];
7643 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7643, __PRETTY_FUNCTION__))
;
7644
7645 SDValue Arg = OutVals[RealResIdx];
7646
7647 if (Subtarget.isAIXABI() &&
7648 (VA.getLocVT().isVector() || VA.getValVT().isVector()))
7649 report_fatal_error("Returning vector types not yet supported on AIX.");
7650
7651 switch (VA.getLocInfo()) {
7652 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7652)
;
7653 case CCValAssign::Full: break;
7654 case CCValAssign::AExt:
7655 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7656 break;
7657 case CCValAssign::ZExt:
7658 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7659 break;
7660 case CCValAssign::SExt:
7661 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7662 break;
7663 }
7664 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7665 bool isLittleEndian = Subtarget.isLittleEndian();
7666 // Legalize ret f64 -> ret 2 x i32.
7667 SDValue SVal =
7668 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7669 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7670 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7671 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7672 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7673 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7674 Flag = Chain.getValue(1);
7675 VA = RVLocs[++i]; // skip ahead to next loc
7676 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7677 } else
7678 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7679 Flag = Chain.getValue(1);
7680 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7681 }
7682
7683 RetOps[0] = Chain; // Update chain.
7684
7685 // Add the flag if we have it.
7686 if (Flag.getNode())
7687 RetOps.push_back(Flag);
7688
7689 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7690}
7691
7692SDValue
7693PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7694 SelectionDAG &DAG) const {
7695 SDLoc dl(Op);
7696
7697 // Get the correct type for integers.
7698 EVT IntVT = Op.getValueType();
7699
7700 // Get the inputs.
7701 SDValue Chain = Op.getOperand(0);
7702 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7703 // Build a DYNAREAOFFSET node.
7704 SDValue Ops[2] = {Chain, FPSIdx};
7705 SDVTList VTs = DAG.getVTList(IntVT);
7706 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7707}
7708
7709SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7710 SelectionDAG &DAG) const {
7711 // When we pop the dynamic allocation we need to restore the SP link.
7712 SDLoc dl(Op);
7713
7714 // Get the correct type for pointers.
7715 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7716
7717 // Construct the stack pointer operand.
7718 bool isPPC64 = Subtarget.isPPC64();
7719 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7720 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7721
7722 // Get the operands for the STACKRESTORE.
7723 SDValue Chain = Op.getOperand(0);
7724 SDValue SaveSP = Op.getOperand(1);
7725
7726 // Load the old link SP.
7727 SDValue LoadLinkSP =
7728 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7729
7730 // Restore the stack pointer.
7731 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7732
7733 // Store the old link SP.
7734 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7735}
7736
7737SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7738 MachineFunction &MF = DAG.getMachineFunction();
7739 bool isPPC64 = Subtarget.isPPC64();
7740 EVT PtrVT = getPointerTy(MF.getDataLayout());
7741
7742 // Get current frame pointer save index. The users of this index will be
7743 // primarily DYNALLOC instructions.
7744 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7745 int RASI = FI->getReturnAddrSaveIndex();
7746
7747 // If the frame pointer save index hasn't been defined yet.
7748 if (!RASI) {
7749 // Find out what the fix offset of the frame pointer save area.
7750 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7751 // Allocate the frame index for frame pointer save area.
7752 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7753 // Save the result.
7754 FI->setReturnAddrSaveIndex(RASI);
7755 }
7756 return DAG.getFrameIndex(RASI, PtrVT);
7757}
7758
7759SDValue
7760PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7761 MachineFunction &MF = DAG.getMachineFunction();
7762 bool isPPC64 = Subtarget.isPPC64();
7763 EVT PtrVT = getPointerTy(MF.getDataLayout());
7764
7765 // Get current frame pointer save index. The users of this index will be
7766 // primarily DYNALLOC instructions.
7767 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7768 int FPSI = FI->getFramePointerSaveIndex();
7769
7770 // If the frame pointer save index hasn't been defined yet.
7771 if (!FPSI) {
7772 // Find out what the fix offset of the frame pointer save area.
7773 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7774 // Allocate the frame index for frame pointer save area.
7775 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7776 // Save the result.
7777 FI->setFramePointerSaveIndex(FPSI);
7778 }
7779 return DAG.getFrameIndex(FPSI, PtrVT);
7780}
7781
7782SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7783 SelectionDAG &DAG) const {
7784 MachineFunction &MF = DAG.getMachineFunction();
7785 // Get the inputs.
7786 SDValue Chain = Op.getOperand(0);
7787 SDValue Size = Op.getOperand(1);
7788 SDLoc dl(Op);
7789
7790 // Get the correct type for pointers.
7791 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7792 // Negate the size.
7793 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7794 DAG.getConstant(0, dl, PtrVT), Size);
7795 // Construct a node for the frame pointer save index.
7796 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7797 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7798 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7799 if (hasInlineStackProbe(MF))
7800 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7801 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7802}
7803
7804SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7805 SelectionDAG &DAG) const {
7806 MachineFunction &MF = DAG.getMachineFunction();
7807
7808 bool isPPC64 = Subtarget.isPPC64();
7809 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7810
7811 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7812 return DAG.getFrameIndex(FI, PtrVT);
7813}
7814
7815SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7816 SelectionDAG &DAG) const {
7817 SDLoc DL(Op);
7818 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7819 DAG.getVTList(MVT::i32, MVT::Other),
7820 Op.getOperand(0), Op.getOperand(1));
7821}
7822
7823SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7824 SelectionDAG &DAG) const {
7825 SDLoc DL(Op);
7826 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7827 Op.getOperand(0), Op.getOperand(1));
7828}
7829
7830SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7831 if (Op.getValueType().isVector())
7832 return LowerVectorLoad(Op, DAG);
7833
7834 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7835, __PRETTY_FUNCTION__))
7835 "Custom lowering only for i1 loads")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 loads\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7835, __PRETTY_FUNCTION__))
;
7836
7837 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7838
7839 SDLoc dl(Op);
7840 LoadSDNode *LD = cast<LoadSDNode>(Op);
7841
7842 SDValue Chain = LD->getChain();
7843 SDValue BasePtr = LD->getBasePtr();
7844 MachineMemOperand *MMO = LD->getMemOperand();
7845
7846 SDValue NewLD =
7847 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7848 BasePtr, MVT::i8, MMO);
7849 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7850
7851 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7852 return DAG.getMergeValues(Ops, dl);
7853}
7854
7855SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7856 if (Op.getOperand(1).getValueType().isVector())
7857 return LowerVectorStore(Op, DAG);
7858
7859 assert(Op.getOperand(1).getValueType() == MVT::i1 &&((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7860, __PRETTY_FUNCTION__))
7860 "Custom lowering only for i1 stores")((Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::i1 && \"Custom lowering only for i1 stores\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7860, __PRETTY_FUNCTION__))
;
7861
7862 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7863
7864 SDLoc dl(Op);
7865 StoreSDNode *ST = cast<StoreSDNode>(Op);
7866
7867 SDValue Chain = ST->getChain();
7868 SDValue BasePtr = ST->getBasePtr();
7869 SDValue Value = ST->getValue();
7870 MachineMemOperand *MMO = ST->getMemOperand();
7871
7872 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7873 Value);
7874 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7875}
7876
7877// FIXME: Remove this once the ANDI glue bug is fixed:
7878SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7879 assert(Op.getValueType() == MVT::i1 &&((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7880, __PRETTY_FUNCTION__))
7880 "Custom lowering only for i1 results")((Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i1 && \"Custom lowering only for i1 results\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7880, __PRETTY_FUNCTION__))
;
7881
7882 SDLoc DL(Op);
7883 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7884}
7885
7886SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7887 SelectionDAG &DAG) const {
7888
7889 // Implements a vector truncate that fits in a vector register as a shuffle.
7890 // We want to legalize vector truncates down to where the source fits in
7891 // a vector register (and target is therefore smaller than vector register
7892 // size). At that point legalization will try to custom lower the sub-legal
7893 // result and get here - where we can contain the truncate as a single target
7894 // operation.
7895
7896 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7897 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7898 //
7899 // We will implement it for big-endian ordering as this (where x denotes
7900 // undefined):
7901 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7902 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7903 //
7904 // The same operation in little-endian ordering will be:
7905 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7906 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7907
7908 EVT TrgVT = Op.getValueType();
7909 assert(TrgVT.isVector() && "Vector type expected.")((TrgVT.isVector() && "Vector type expected.") ? static_cast
<void> (0) : __assert_fail ("TrgVT.isVector() && \"Vector type expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 7909, __PRETTY_FUNCTION__))
;
7910 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7911 EVT EltVT = TrgVT.getVectorElementType();
7912 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7913 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7914 !isPowerOf2_32(EltVT.getSizeInBits()))
7915 return SDValue();
7916
7917 SDValue N1 = Op.getOperand(0);
7918 EVT SrcVT = N1.getValueType();
7919 unsigned SrcSize = SrcVT.getSizeInBits();
7920 if (SrcSize > 256 ||
7921 !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7922 !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7923 return SDValue();
7924 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7925 return SDValue();
7926
7927 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7928 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7929
7930 SDLoc DL(Op);
7931 SDValue Op1, Op2;
7932 if (SrcSize == 256) {
7933 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7934 EVT SplitVT =
7935 N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7936 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7937 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7938 DAG.getConstant(0, DL, VecIdxTy));
7939 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7940 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7941 }
7942 else {
7943 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7944 Op2 = DAG.getUNDEF(WideVT);
7945 }
7946
7947 // First list the elements we want to keep.
7948 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7949 SmallVector<int, 16> ShuffV;
7950 if (Subtarget.isLittleEndian())
7951 for (unsigned i = 0; i < TrgNumElts; ++i)
7952 ShuffV.push_back(i * SizeMult);
7953 else
7954 for (unsigned i = 1; i <= TrgNumElts; ++i)
7955 ShuffV.push_back(i * SizeMult - 1);
7956
7957 // Populate the remaining elements with undefs.
7958 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7959 // ShuffV.push_back(i + WideNumElts);
7960 ShuffV.push_back(WideNumElts + 1);
7961
7962 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7963 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7964 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7965}
7966
7967/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7968/// possible.
7969SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7970 // Not FP, or using SPE? Not a fsel.
7971 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7972 !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7973 return Op;
7974
7975 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7976
7977 EVT ResVT = Op.getValueType();
7978 EVT CmpVT = Op.getOperand(0).getValueType();
7979 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7980 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7981 SDLoc dl(Op);
7982 SDNodeFlags Flags = Op.getNode()->getFlags();
7983
7984 // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7985 // presence of infinities.
7986 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7987 switch (CC) {
7988 default:
7989 break;
7990 case ISD::SETOGT:
7991 case ISD::SETGT:
7992 return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7993 case ISD::SETOLT:
7994 case ISD::SETLT:
7995 return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7996 }
7997 }
7998
7999 // We might be able to do better than this under some circumstances, but in
8000 // general, fsel-based lowering of select is a finite-math-only optimization.
8001 // For more information, see section F.3 of the 2.06 ISA specification.
8002 // With ISA 3.0
8003 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8004 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8005 return Op;
8006
8007 // If the RHS of the comparison is a 0.0, we don't need to do the
8008 // subtraction at all.
8009 SDValue Sel1;
8010 if (isFloatingPointZero(RHS))
8011 switch (CC) {
8012 default: break; // SETUO etc aren't handled by fsel.
8013 case ISD::SETNE:
8014 std::swap(TV, FV);
8015 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8016 case ISD::SETEQ:
8017 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8018 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8019 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8020 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8021 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8022 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8023 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8024 case ISD::SETULT:
8025 case ISD::SETLT:
8026 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8027 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8028 case ISD::SETOGE:
8029 case ISD::SETGE:
8030 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8031 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8032 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8033 case ISD::SETUGT:
8034 case ISD::SETGT:
8035 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8036 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8037 case ISD::SETOLE:
8038 case ISD::SETLE:
8039 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8040 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8041 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8042 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8043 }
8044
8045 SDValue Cmp;
8046 switch (CC) {
8047 default: break; // SETUO etc aren't handled by fsel.
8048 case ISD::SETNE:
8049 std::swap(TV, FV);
8050 LLVM_FALLTHROUGH[[gnu::fallthrough]];
8051 case ISD::SETEQ:
8052 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8053 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8054 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8055 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8056 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8057 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8058 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8059 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8060 case ISD::SETULT:
8061 case ISD::SETLT:
8062 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8063 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8064 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8065 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8066 case ISD::SETOGE:
8067 case ISD::SETGE:
8068 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8069 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8070 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8071 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8072 case ISD::SETUGT:
8073 case ISD::SETGT:
8074 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8075 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8076 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8077 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8078 case ISD::SETOLE:
8079 case ISD::SETLE:
8080 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8081 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8082 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8083 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8084 }
8085 return Op;
8086}
8087
8088static unsigned getPPCStrictOpcode(unsigned Opc) {
8089 switch (Opc) {
8090 default:
8091 llvm_unreachable("No strict version of this opcode!")::llvm::llvm_unreachable_internal("No strict version of this opcode!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8091)
;
8092 case PPCISD::FCTIDZ:
8093 return PPCISD::STRICT_FCTIDZ;
8094 case PPCISD::FCTIWZ:
8095 return PPCISD::STRICT_FCTIWZ;
8096 case PPCISD::FCTIDUZ:
8097 return PPCISD::STRICT_FCTIDUZ;
8098 case PPCISD::FCTIWUZ:
8099 return PPCISD::STRICT_FCTIWUZ;
8100 case PPCISD::FCFID:
8101 return PPCISD::STRICT_FCFID;
8102 case PPCISD::FCFIDU:
8103 return PPCISD::STRICT_FCFIDU;
8104 case PPCISD::FCFIDS:
8105 return PPCISD::STRICT_FCFIDS;
8106 case PPCISD::FCFIDUS:
8107 return PPCISD::STRICT_FCFIDUS;
8108 }
8109}
8110
8111static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8112 const PPCSubtarget &Subtarget) {
8113 SDLoc dl(Op);
8114 bool IsStrict = Op->isStrictFPOpcode();
8115 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8116 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8117
8118 // TODO: Any other flags to propagate?
8119 SDNodeFlags Flags;
8120 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8121
8122 // For strict nodes, source is the second operand.
8123 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8124 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8125 assert(Src.getValueType().isFloatingPoint())((Src.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("Src.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8125, __PRETTY_FUNCTION__))
;
8126 if (Src.getValueType() == MVT::f32) {
8127 if (IsStrict) {
8128 Src =
8129 DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8130 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8131 Chain = Src.getValue(1);
8132 } else
8133 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8134 }
8135 SDValue Conv;
8136 unsigned Opc = ISD::DELETED_NODE;
8137 switch (Op.getSimpleValueType().SimpleTy) {
8138 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!")::llvm::llvm_unreachable_internal("Unhandled FP_TO_INT type in custom expander!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8138)
;
8139 case MVT::i32:
8140 Opc = IsSigned ? PPCISD::FCTIWZ
8141 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8142 break;
8143 case MVT::i64:
8144 assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8145, __PRETTY_FUNCTION__))
8145 "i64 FP_TO_UINT is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"i64 FP_TO_UINT is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8145, __PRETTY_FUNCTION__))
;
8146 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8147 }
8148 if (IsStrict) {
8149 Opc = getPPCStrictOpcode(Opc);
8150 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8151 {Chain, Src}, Flags);
8152 } else {
8153 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8154 }
8155 return Conv;
8156}
8157
8158void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8159 SelectionDAG &DAG,
8160 const SDLoc &dl) const {
8161 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8162 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8163 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8164 bool IsStrict = Op->isStrictFPOpcode();
8165
8166 // Convert the FP value to an int value through memory.
8167 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8168 (IsSigned || Subtarget.hasFPCVT());
8169 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8170 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8171 MachinePointerInfo MPI =
8172 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8173
8174 // Emit a store to the stack slot.
8175 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8176 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8177 if (i32Stack) {
8178 MachineFunction &MF = DAG.getMachineFunction();
8179 Alignment = Align(4);
8180 MachineMemOperand *MMO =
8181 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8182 SDValue Ops[] = { Chain, Tmp, FIPtr };
8183 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8184 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8185 } else
8186 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8187
8188 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8189 // add in a bias on big endian.
8190 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8191 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8192 DAG.getConstant(4, dl, FIPtr.getValueType()));
8193 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8194 }
8195
8196 RLI.Chain = Chain;
8197 RLI.Ptr = FIPtr;
8198 RLI.MPI = MPI;
8199 RLI.Alignment = Alignment;
8200}
8201
8202/// Custom lowers floating point to integer conversions to use
8203/// the direct move instructions available in ISA 2.07 to avoid the
8204/// need for load/store combinations.
8205SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8206 SelectionDAG &DAG,
8207 const SDLoc &dl) const {
8208 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8209 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8210 if (Op->isStrictFPOpcode())
8211 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8212 else
8213 return Mov;
8214}
8215
8216SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8217 const SDLoc &dl) const {
8218 bool IsStrict = Op->isStrictFPOpcode();
8219 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8220 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8221 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8222 EVT SrcVT = Src.getValueType();
8223 EVT DstVT = Op.getValueType();
8224
8225 // FP to INT conversions are legal for f128.
8226 if (SrcVT == MVT::f128)
8227 return Op;
8228
8229 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8230 // PPC (the libcall is not available).
8231 if (SrcVT == MVT::ppcf128) {
8232 if (DstVT == MVT::i32) {
8233 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8234 // set other fast-math flags to FP operations in both strict and
8235 // non-strict cases. (FP_TO_SINT, FSUB)
8236 SDNodeFlags Flags;
8237 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8238
8239 if (IsSigned) {
8240 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8241 DAG.getIntPtrConstant(0, dl));
8242 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8243 DAG.getIntPtrConstant(1, dl));
8244
8245 // Add the two halves of the long double in round-to-zero mode, and use
8246 // a smaller FP_TO_SINT.
8247 if (IsStrict) {
8248 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8249 DAG.getVTList(MVT::f64, MVT::Other),
8250 {Op.getOperand(0), Lo, Hi}, Flags);
8251 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8252 DAG.getVTList(MVT::i32, MVT::Other),
8253 {Res.getValue(1), Res}, Flags);
8254 } else {
8255 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8256 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8257 }
8258 } else {
8259 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8260 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8261 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8262 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8263 if (IsStrict) {
8264 // Sel = Src < 0x80000000
8265 // FltOfs = select Sel, 0.0, 0x80000000
8266 // IntOfs = select Sel, 0, 0x80000000
8267 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8268 SDValue Chain = Op.getOperand(0);
8269 EVT SetCCVT =
8270 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8271 EVT DstSetCCVT =
8272 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8273 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8274 Chain, true);
8275 Chain = Sel.getValue(1);
8276
8277 SDValue FltOfs = DAG.getSelect(
8278 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8279 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8280
8281 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8282 DAG.getVTList(SrcVT, MVT::Other),
8283 {Chain, Src, FltOfs}, Flags);
8284 Chain = Val.getValue(1);
8285 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8286 DAG.getVTList(DstVT, MVT::Other),
8287 {Chain, Val}, Flags);
8288 Chain = SInt.getValue(1);
8289 SDValue IntOfs = DAG.getSelect(
8290 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8291 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8292 return DAG.getMergeValues({Result, Chain}, dl);
8293 } else {
8294 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8295 // FIXME: generated code sucks.
8296 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8297 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8298 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8299 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8300 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8301 }
8302 }
8303 }
8304
8305 return SDValue();
8306 }
8307
8308 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8309 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8310
8311 ReuseLoadInfo RLI;
8312 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8313
8314 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8315 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8316}
8317
8318// We're trying to insert a regular store, S, and then a load, L. If the
8319// incoming value, O, is a load, we might just be able to have our load use the
8320// address used by O. However, we don't know if anything else will store to
8321// that address before we can load from it. To prevent this situation, we need
8322// to insert our load, L, into the chain as a peer of O. To do this, we give L
8323// the same chain operand as O, we create a token factor from the chain results
8324// of O and L, and we replace all uses of O's chain result with that token
8325// factor (see spliceIntoChain below for this last part).
8326bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8327 ReuseLoadInfo &RLI,
8328 SelectionDAG &DAG,
8329 ISD::LoadExtType ET) const {
8330 // Conservatively skip reusing for constrained FP nodes.
8331 if (Op->isStrictFPOpcode())
8332 return false;
8333
8334 SDLoc dl(Op);
8335 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8336 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8337 if (ET == ISD::NON_EXTLOAD &&
8338 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8339 isOperationLegalOrCustom(Op.getOpcode(),
8340 Op.getOperand(0).getValueType())) {
8341
8342 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8343 return true;
8344 }
8345
8346 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8347 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8348 LD->isNonTemporal())
8349 return false;
8350 if (LD->getMemoryVT() != MemVT)
8351 return false;
8352
8353 RLI.Ptr = LD->getBasePtr();
8354 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8355 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8356, __PRETTY_FUNCTION__))
8356 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8356, __PRETTY_FUNCTION__))
;
8357 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8358 LD->getOffset());
8359 }
8360
8361 RLI.Chain = LD->getChain();
8362 RLI.MPI = LD->getPointerInfo();
8363 RLI.IsDereferenceable = LD->isDereferenceable();
8364 RLI.IsInvariant = LD->isInvariant();
8365 RLI.Alignment = LD->getAlign();
8366 RLI.AAInfo = LD->getAAInfo();
8367 RLI.Ranges = LD->getRanges();
8368
8369 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8370 return true;
8371}
8372
8373// Given the head of the old chain, ResChain, insert a token factor containing
8374// it and NewResChain, and make users of ResChain now be users of that token
8375// factor.
8376// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8377void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8378 SDValue NewResChain,
8379 SelectionDAG &DAG) const {
8380 if (!ResChain)
8381 return;
8382
8383 SDLoc dl(NewResChain);
8384
8385 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8386 NewResChain, DAG.getUNDEF(MVT::Other));
8387 assert(TF.getNode() != NewResChain.getNode() &&((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8388, __PRETTY_FUNCTION__))
8388 "A new TF really is required here")((TF.getNode() != NewResChain.getNode() && "A new TF really is required here"
) ? static_cast<void> (0) : __assert_fail ("TF.getNode() != NewResChain.getNode() && \"A new TF really is required here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8388, __PRETTY_FUNCTION__))
;
8389
8390 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8391 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8392}
8393
8394/// Analyze profitability of direct move
8395/// prefer float load to int load plus direct move
8396/// when there is no integer use of int load
8397bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8398 SDNode *Origin = Op.getOperand(0).getNode();
8399 if (Origin->getOpcode() != ISD::LOAD)
8400 return true;
8401
8402 // If there is no LXSIBZX/LXSIHZX, like Power8,
8403 // prefer direct move if the memory size is 1 or 2 bytes.
8404 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8405 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8406 return true;
8407
8408 for (SDNode::use_iterator UI = Origin->use_begin(),
8409 UE = Origin->use_end();
8410 UI != UE; ++UI) {
8411
8412 // Only look at the users of the loaded value.
8413 if (UI.getUse().get().getResNo() != 0)
8414 continue;
8415
8416 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8417 UI->getOpcode() != ISD::UINT_TO_FP &&
8418 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8419 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8420 return true;
8421 }
8422
8423 return false;
8424}
8425
8426static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8427 const PPCSubtarget &Subtarget,
8428 SDValue Chain = SDValue()) {
8429 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8430 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8431 SDLoc dl(Op);
8432
8433 // TODO: Any other flags to propagate?
8434 SDNodeFlags Flags;
8435 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8436
8437 // If we have FCFIDS, then use it when converting to single-precision.
8438 // Otherwise, convert to double-precision and then round.
8439 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8440 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8441 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8442 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8443 if (Op->isStrictFPOpcode()) {
8444 if (!Chain)
8445 Chain = Op.getOperand(0);
8446 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8447 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8448 } else
8449 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8450}
8451
8452/// Custom lowers integer to floating point conversions to use
8453/// the direct move instructions available in ISA 2.07 to avoid the
8454/// need for load/store combinations.
8455SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8456 SelectionDAG &DAG,
8457 const SDLoc &dl) const {
8458 assert((Op.getValueType() == MVT::f32 ||(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8460, __PRETTY_FUNCTION__))
8459 Op.getValueType() == MVT::f64) &&(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8460, __PRETTY_FUNCTION__))
8460 "Invalid floating point type as target of conversion")(((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::
f64) && "Invalid floating point type as target of conversion"
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && \"Invalid floating point type as target of conversion\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8460, __PRETTY_FUNCTION__))
;
8461 assert(Subtarget.hasFPCVT() &&((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8462, __PRETTY_FUNCTION__))
8462 "Int to FP conversions with direct moves require FPCVT")((Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasFPCVT() && \"Int to FP conversions with direct moves require FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8462, __PRETTY_FUNCTION__))
;
8463 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8464 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8465 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8466 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8467 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8468 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8469 return convertIntToFP(Op, Mov, DAG, Subtarget);
8470}
8471
8472static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8473
8474 EVT VecVT = Vec.getValueType();
8475 assert(VecVT.isVector() && "Expected a vector type.")((VecVT.isVector() && "Expected a vector type.") ? static_cast
<void> (0) : __assert_fail ("VecVT.isVector() && \"Expected a vector type.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8475, __PRETTY_FUNCTION__))
;
8476 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.")((VecVT.getSizeInBits() < 128 && "Vector is already full width."
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() < 128 && \"Vector is already full width.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8476, __PRETTY_FUNCTION__))
;
8477
8478 EVT EltVT = VecVT.getVectorElementType();
8479 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8480 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8481
8482 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8483 SmallVector<SDValue, 16> Ops(NumConcat);
8484 Ops[0] = Vec;
8485 SDValue UndefVec = DAG.getUNDEF(VecVT);
8486 for (unsigned i = 1; i < NumConcat; ++i)
8487 Ops[i] = UndefVec;
8488
8489 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8490}
8491
8492SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8493 const SDLoc &dl) const {
8494 bool IsStrict = Op->isStrictFPOpcode();
8495 unsigned Opc = Op.getOpcode();
8496 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8497 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8499, __PRETTY_FUNCTION__))
8498 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8499, __PRETTY_FUNCTION__))
8499 "Unexpected conversion type")(((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc ==
ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
"Unexpected conversion type") ? static_cast<void> (0) :
__assert_fail ("(Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && \"Unexpected conversion type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8499, __PRETTY_FUNCTION__))
;
8500 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8501, __PRETTY_FUNCTION__))
8501 "Supports conversions to v2f64/v4f32 only.")(((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT
::v4f32) && "Supports conversions to v2f64/v4f32 only."
) ? static_cast<void> (0) : __assert_fail ("(Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && \"Supports conversions to v2f64/v4f32 only.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8501, __PRETTY_FUNCTION__))
;
8502
8503 // TODO: Any other flags to propagate?
8504 SDNodeFlags Flags;
8505 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8506
8507 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8508 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8509
8510 SDValue Wide = widenVec(DAG, Src, dl);
8511 EVT WideVT = Wide.getValueType();
8512 unsigned WideNumElts = WideVT.getVectorNumElements();
8513 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8514
8515 SmallVector<int, 16> ShuffV;
8516 for (unsigned i = 0; i < WideNumElts; ++i)
8517 ShuffV.push_back(i + WideNumElts);
8518
8519 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8520 int SaveElts = FourEltRes ? 4 : 2;
8521 if (Subtarget.isLittleEndian())
8522 for (int i = 0; i < SaveElts; i++)
8523 ShuffV[i * Stride] = i;
8524 else
8525 for (int i = 1; i <= SaveElts; i++)
8526 ShuffV[i * Stride - 1] = i - 1;
8527
8528 SDValue ShuffleSrc2 =
8529 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8530 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8531
8532 SDValue Extend;
8533 if (SignedConv) {
8534 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8535 EVT ExtVT = Src.getValueType();
8536 if (Subtarget.hasP9Altivec())
8537 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8538 IntermediateVT.getVectorNumElements());
8539
8540 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8541 DAG.getValueType(ExtVT));
8542 } else
8543 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8544
8545 if (IsStrict)
8546 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8547 {Op.getOperand(0), Extend}, Flags);
8548
8549 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8550}
8551
8552SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8553 SelectionDAG &DAG) const {
8554 SDLoc dl(Op);
8555 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8556 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8557 bool IsStrict = Op->isStrictFPOpcode();
8558 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8559 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8560
8561 // TODO: Any other flags to propagate?
8562 SDNodeFlags Flags;
8563 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8564
8565 EVT InVT = Src.getValueType();
8566 EVT OutVT = Op.getValueType();
8567 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8568 isOperationCustom(Op.getOpcode(), InVT))
8569 return LowerINT_TO_FPVector(Op, DAG, dl);
8570
8571 // Conversions to f128 are legal.
8572 if (Op.getValueType() == MVT::f128)
8573 return Op;
8574
8575 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8576 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8577 return SDValue();
8578
8579 if (Src.getValueType() == MVT::i1)
8580 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8581 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8582 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8583
8584 // If we have direct moves, we can do all the conversion, skip the store/load
8585 // however, without FPCVT we can't do most conversions.
8586 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8587 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8588 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8589
8590 assert((IsSigned || Subtarget.hasFPCVT()) &&(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8591, __PRETTY_FUNCTION__))
8591 "UINT_TO_FP is supported only with FPCVT")(((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"
) ? static_cast<void> (0) : __assert_fail ("(IsSigned || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8591, __PRETTY_FUNCTION__))
;
8592
8593 if (Src.getValueType() == MVT::i64) {
8594 SDValue SINT = Src;
8595 // When converting to single-precision, we actually need to convert
8596 // to double-precision first and then round to single-precision.
8597 // To avoid double-rounding effects during that operation, we have
8598 // to prepare the input operand. Bits that might be truncated when
8599 // converting to double-precision are replaced by a bit that won't
8600 // be lost at this stage, but is below the single-precision rounding
8601 // position.
8602 //
8603 // However, if -enable-unsafe-fp-math is in effect, accept double
8604 // rounding to avoid the extra overhead.
8605 if (Op.getValueType() == MVT::f32 &&
8606 !Subtarget.hasFPCVT() &&
8607 !DAG.getTarget().Options.UnsafeFPMath) {
8608
8609 // Twiddle input to make sure the low 11 bits are zero. (If this
8610 // is the case, we are guaranteed the value will fit into the 53 bit
8611 // mantissa of an IEEE double-precision value without rounding.)
8612 // If any of those low 11 bits were not zero originally, make sure
8613 // bit 12 (value 2048) is set instead, so that the final rounding
8614 // to single-precision gets the correct result.
8615 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8616 SINT, DAG.getConstant(2047, dl, MVT::i64));
8617 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8618 Round, DAG.getConstant(2047, dl, MVT::i64));
8619 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8620 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8621 Round, DAG.getConstant(-2048, dl, MVT::i64));
8622
8623 // However, we cannot use that value unconditionally: if the magnitude
8624 // of the input value is small, the bit-twiddling we did above might
8625 // end up visibly changing the output. Fortunately, in that case, we
8626 // don't need to twiddle bits since the original input will convert
8627 // exactly to double-precision floating-point already. Therefore,
8628 // construct a conditional to use the original value if the top 11
8629 // bits are all sign-bit copies, and use the rounded value computed
8630 // above otherwise.
8631 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8632 SINT, DAG.getConstant(53, dl, MVT::i32));
8633 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8634 Cond, DAG.getConstant(1, dl, MVT::i64));
8635 Cond = DAG.getSetCC(
8636 dl,
8637 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8638 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8639
8640 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8641 }
8642
8643 ReuseLoadInfo RLI;
8644 SDValue Bits;
8645
8646 MachineFunction &MF = DAG.getMachineFunction();
8647 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8648 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8649 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8650 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8651 } else if (Subtarget.hasLFIWAX() &&
8652 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8653 MachineMemOperand *MMO =
8654 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8655 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8656 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8657 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8658 DAG.getVTList(MVT::f64, MVT::Other),
8659 Ops, MVT::i32, MMO);
8660 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8661 } else if (Subtarget.hasFPCVT() &&
8662 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8663 MachineMemOperand *MMO =
8664 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8665 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8666 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8667 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8668 DAG.getVTList(MVT::f64, MVT::Other),
8669 Ops, MVT::i32, MMO);
8670 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8671 } else if (((Subtarget.hasLFIWAX() &&
8672 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8673 (Subtarget.hasFPCVT() &&
8674 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8675 SINT.getOperand(0).getValueType() == MVT::i32) {
8676 MachineFrameInfo &MFI = MF.getFrameInfo();
8677 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8678
8679 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8680 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8681
8682 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8683 MachinePointerInfo::getFixedStack(
8684 DAG.getMachineFunction(), FrameIdx));
8685 Chain = Store;
8686
8687 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8688, __PRETTY_FUNCTION__))
8688 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8688, __PRETTY_FUNCTION__))
;
8689
8690 RLI.Ptr = FIdx;
8691 RLI.Chain = Chain;
8692 RLI.MPI =
8693 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8694 RLI.Alignment = Align(4);
8695
8696 MachineMemOperand *MMO =
8697 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8698 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8699 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8700 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8701 PPCISD::LFIWZX : PPCISD::LFIWAX,
8702 dl, DAG.getVTList(MVT::f64, MVT::Other),
8703 Ops, MVT::i32, MMO);
8704 Chain = Bits.getValue(1);
8705 } else
8706 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8707
8708 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8709 if (IsStrict)
8710 Chain = FP.getValue(1);
8711
8712 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8713 if (IsStrict)
8714 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8715 DAG.getVTList(MVT::f32, MVT::Other),
8716 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8717 else
8718 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8719 DAG.getIntPtrConstant(0, dl));
8720 }
8721 return FP;
8722 }
8723
8724 assert(Src.getValueType() == MVT::i32 &&((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8725, __PRETTY_FUNCTION__))
8725 "Unhandled INT_TO_FP type in custom expander!")((Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"
) ? static_cast<void> (0) : __assert_fail ("Src.getValueType() == MVT::i32 && \"Unhandled INT_TO_FP type in custom expander!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8725, __PRETTY_FUNCTION__))
;
8726 // Since we only generate this in 64-bit mode, we can take advantage of
8727 // 64-bit registers. In particular, sign extend the input value into the
8728 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8729 // then lfd it and fcfid it.
8730 MachineFunction &MF = DAG.getMachineFunction();
8731 MachineFrameInfo &MFI = MF.getFrameInfo();
8732 EVT PtrVT = getPointerTy(MF.getDataLayout());
8733
8734 SDValue Ld;
8735 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8736 ReuseLoadInfo RLI;
8737 bool ReusingLoad;
8738 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8739 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8740 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8741
8742 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8743 MachinePointerInfo::getFixedStack(
8744 DAG.getMachineFunction(), FrameIdx));
8745 Chain = Store;
8746
8747 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8748, __PRETTY_FUNCTION__))
8748 "Expected an i32 store")((cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32
&& "Expected an i32 store") ? static_cast<void>
(0) : __assert_fail ("cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && \"Expected an i32 store\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8748, __PRETTY_FUNCTION__))
;
8749
8750 RLI.Ptr = FIdx;
8751 RLI.Chain = Chain;
8752 RLI.MPI =
8753 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8754 RLI.Alignment = Align(4);
8755 }
8756
8757 MachineMemOperand *MMO =
8758 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8759 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8760 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8761 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8762 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8763 MVT::i32, MMO);
8764 Chain = Ld.getValue(1);
8765 if (ReusingLoad)
8766 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8767 } else {
8768 assert(Subtarget.isPPC64() &&((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8769, __PRETTY_FUNCTION__))
8769 "i32->FP without LFIWAX supported only on PPC64")((Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"i32->FP without LFIWAX supported only on PPC64\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8769, __PRETTY_FUNCTION__))
;
8770
8771 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8772 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8773
8774 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8775
8776 // STD the extended value into the stack slot.
8777 SDValue Store = DAG.getStore(
8778 Chain, dl, Ext64, FIdx,
8779 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8780 Chain = Store;
8781
8782 // Load the value as a double.
8783 Ld = DAG.getLoad(
8784 MVT::f64, dl, Chain, FIdx,
8785 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8786 Chain = Ld.getValue(1);
8787 }
8788
8789 // FCFID it and return it.
8790 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8791 if (IsStrict)
8792 Chain = FP.getValue(1);
8793 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8794 if (IsStrict)
8795 FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8796 DAG.getVTList(MVT::f32, MVT::Other),
8797 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8798 else
8799 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8800 DAG.getIntPtrConstant(0, dl));
8801 }
8802 return FP;
8803}
8804
8805SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8806 SelectionDAG &DAG) const {
8807 SDLoc dl(Op);
8808 /*
8809 The rounding mode is in bits 30:31 of FPSR, and has the following
8810 settings:
8811 00 Round to nearest
8812 01 Round to 0
8813 10 Round to +inf
8814 11 Round to -inf
8815
8816 FLT_ROUNDS, on the other hand, expects the following:
8817 -1 Undefined
8818 0 Round to 0
8819 1 Round to nearest
8820 2 Round to +inf
8821 3 Round to -inf
8822
8823 To perform the conversion, we do:
8824 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8825 */
8826
8827 MachineFunction &MF = DAG.getMachineFunction();
8828 EVT VT = Op.getValueType();
8829 EVT PtrVT = getPointerTy(MF.getDataLayout());
8830
8831 // Save FP Control Word to register
8832 SDValue Chain = Op.getOperand(0);
8833 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8834 Chain = MFFS.getValue(1);
8835
8836 // Save FP register to stack slot
8837 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8838 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8839 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8840
8841 // Load FP Control Word from low 32 bits of stack slot.
8842 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8843 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8844 SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8845 Chain = CWD.getValue(1);
8846
8847 // Transform as necessary
8848 SDValue CWD1 =
8849 DAG.getNode(ISD::AND, dl, MVT::i32,
8850 CWD, DAG.getConstant(3, dl, MVT::i32));
8851 SDValue CWD2 =
8852 DAG.getNode(ISD::SRL, dl, MVT::i32,
8853 DAG.getNode(ISD::AND, dl, MVT::i32,
8854 DAG.getNode(ISD::XOR, dl, MVT::i32,
8855 CWD, DAG.getConstant(3, dl, MVT::i32)),
8856 DAG.getConstant(3, dl, MVT::i32)),
8857 DAG.getConstant(1, dl, MVT::i32));
8858
8859 SDValue RetVal =
8860 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8861
8862 RetVal =
8863 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8864 dl, VT, RetVal);
8865
8866 return DAG.getMergeValues({RetVal, Chain}, dl);
8867}
8868
8869SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8870 EVT VT = Op.getValueType();
8871 unsigned BitWidth = VT.getSizeInBits();
8872 SDLoc dl(Op);
8873 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8875, __PRETTY_FUNCTION__))
8874 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8875, __PRETTY_FUNCTION__))
8875 "Unexpected SHL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SHL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SHL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8875, __PRETTY_FUNCTION__))
;
8876
8877 // Expand into a bunch of logical ops. Note that these ops
8878 // depend on the PPC behavior for oversized shift amounts.
8879 SDValue Lo = Op.getOperand(0);
8880 SDValue Hi = Op.getOperand(1);
8881 SDValue Amt = Op.getOperand(2);
8882 EVT AmtVT = Amt.getValueType();
8883
8884 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8885 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8886 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8887 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8888 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8889 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8890 DAG.getConstant(-BitWidth, dl, AmtVT));
8891 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8892 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8893 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8894 SDValue OutOps[] = { OutLo, OutHi };
8895 return DAG.getMergeValues(OutOps, dl);
8896}
8897
8898SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8899 EVT VT = Op.getValueType();
8900 SDLoc dl(Op);
8901 unsigned BitWidth = VT.getSizeInBits();
8902 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8904, __PRETTY_FUNCTION__))
8903 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8904, __PRETTY_FUNCTION__))
8904 "Unexpected SRL!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRL!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRL!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8904, __PRETTY_FUNCTION__))
;
8905
8906 // Expand into a bunch of logical ops. Note that these ops
8907 // depend on the PPC behavior for oversized shift amounts.
8908 SDValue Lo = Op.getOperand(0);
8909 SDValue Hi = Op.getOperand(1);
8910 SDValue Amt = Op.getOperand(2);
8911 EVT AmtVT = Amt.getValueType();
8912
8913 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8914 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8915 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8916 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8917 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8918 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8919 DAG.getConstant(-BitWidth, dl, AmtVT));
8920 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8921 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8922 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8923 SDValue OutOps[] = { OutLo, OutHi };
8924 return DAG.getMergeValues(OutOps, dl);
8925}
8926
8927SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8928 SDLoc dl(Op);
8929 EVT VT = Op.getValueType();
8930 unsigned BitWidth = VT.getSizeInBits();
8931 assert(Op.getNumOperands() == 3 &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8933, __PRETTY_FUNCTION__))
8932 VT == Op.getOperand(1).getValueType() &&((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8933, __PRETTY_FUNCTION__))
8933 "Unexpected SRA!")((Op.getNumOperands() == 3 && VT == Op.getOperand(1).
getValueType() && "Unexpected SRA!") ? static_cast<
void> (0) : __assert_fail ("Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && \"Unexpected SRA!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 8933, __PRETTY_FUNCTION__))
;
8934
8935 // Expand into a bunch of logical ops, followed by a select_cc.
8936 SDValue Lo = Op.getOperand(0);
8937 SDValue Hi = Op.getOperand(1);
8938 SDValue Amt = Op.getOperand(2);
8939 EVT AmtVT = Amt.getValueType();
8940
8941 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8942 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8943 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8944 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8945 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8946 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8947 DAG.getConstant(-BitWidth, dl, AmtVT));
8948 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8949 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8950 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8951 Tmp4, Tmp6, ISD::SETLE);
8952 SDValue OutOps[] = { OutLo, OutHi };
8953 return DAG.getMergeValues(OutOps, dl);
8954}
8955
8956SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8957 SelectionDAG &DAG) const {
8958 SDLoc dl(Op);
8959 EVT VT = Op.getValueType();
8960 unsigned BitWidth = VT.getSizeInBits();
8961
8962 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8963 SDValue X = Op.getOperand(0);
8964 SDValue Y = Op.getOperand(1);
8965 SDValue Z = Op.getOperand(2);
8966 EVT AmtVT = Z.getValueType();
8967
8968 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8969 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8970 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8971 // on PowerPC shift by BW being well defined.
8972 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8973 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8974 SDValue SubZ =
8975 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8976 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8977 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8978 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8979}
8980
8981//===----------------------------------------------------------------------===//
8982// Vector related lowering.
8983//
8984
8985/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8986/// element size of SplatSize. Cast the result to VT.
8987static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8988 SelectionDAG &DAG, const SDLoc &dl) {
8989 static const MVT VTys[] = { // canonical VT to use for each size.
8990 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8991 };
8992
8993 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8994
8995 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8996 if (Val == ((1LU << (SplatSize * 8)) - 1)) {
8997 SplatSize = 1;
8998 Val = 0xFF;
8999 }
9000
9001 EVT CanonicalVT = VTys[SplatSize-1];
9002
9003 // Build a canonical splat for this value.
9004 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9005}
9006
9007/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9008/// specified intrinsic ID.
9009static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9010 const SDLoc &dl, EVT DestVT = MVT::Other) {
9011 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9012 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9013 DAG.getConstant(IID, dl, MVT::i32), Op);
9014}
9015
9016/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9017/// specified intrinsic ID.
9018static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9019 SelectionDAG &DAG, const SDLoc &dl,
9020 EVT DestVT = MVT::Other) {
9021 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9022 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9023 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9024}
9025
9026/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9027/// specified intrinsic ID.
9028static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9029 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9030 EVT DestVT = MVT::Other) {
9031 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9032 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9033 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9034}
9035
9036/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9037/// amount. The result has the specified value type.
9038static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9039 SelectionDAG &DAG, const SDLoc &dl) {
9040 // Force LHS/RHS to be the right type.
9041 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9042 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9043
9044 int Ops[16];
9045 for (unsigned i = 0; i != 16; ++i)
9046 Ops[i] = i + Amt;
9047 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9048 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9049}
9050
9051/// Do we have an efficient pattern in a .td file for this node?
9052///
9053/// \param V - pointer to the BuildVectorSDNode being matched
9054/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9055///
9056/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9057/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9058/// the opposite is true (expansion is beneficial) are:
9059/// - The node builds a vector out of integers that are not 32 or 64-bits
9060/// - The node builds a vector out of constants
9061/// - The node is a "load-and-splat"
9062/// In all other cases, we will choose to keep the BUILD_VECTOR.
9063static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9064 bool HasDirectMove,
9065 bool HasP8Vector) {
9066 EVT VecVT = V->getValueType(0);
9067 bool RightType = VecVT == MVT::v2f64 ||
9068 (HasP8Vector && VecVT == MVT::v4f32) ||
9069 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9070 if (!RightType)
9071 return false;
9072
9073 bool IsSplat = true;
9074 bool IsLoad = false;
9075 SDValue Op0 = V->getOperand(0);
9076
9077 // This function is called in a block that confirms the node is not a constant
9078 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9079 // different constants.
9080 if (V->isConstant())
9081 return false;
9082 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9083 if (V->getOperand(i).isUndef())
9084 return false;
9085 // We want to expand nodes that represent load-and-splat even if the
9086 // loaded value is a floating point truncation or conversion to int.
9087 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9088 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9089 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9090 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9091 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9092 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9093 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9094 IsLoad = true;
9095 // If the operands are different or the input is not a load and has more
9096 // uses than just this BV node, then it isn't a splat.
9097 if (V->getOperand(i) != Op0 ||
9098 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9099 IsSplat = false;
9100 }
9101 return !(IsSplat && IsLoad);
9102}
9103
9104// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9105SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9106
9107 SDLoc dl(Op);
9108 SDValue Op0 = Op->getOperand(0);
9109
9110 if ((Op.getValueType() != MVT::f128) ||
9111 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9112 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9113 (Op0.getOperand(1).getValueType() != MVT::i64))
9114 return SDValue();
9115
9116 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9117 Op0.getOperand(1));
9118}
9119
9120static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9121 const SDValue *InputLoad = &Op;
9122 if (InputLoad->getOpcode() == ISD::BITCAST)
9123 InputLoad = &InputLoad->getOperand(0);
9124 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9125 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9126 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9127 InputLoad = &InputLoad->getOperand(0);
9128 }
9129 if (InputLoad->getOpcode() != ISD::LOAD)
9130 return nullptr;
9131 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9132 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9133}
9134
9135// Convert the argument APFloat to a single precision APFloat if there is no
9136// loss in information during the conversion to single precision APFloat and the
9137// resulting number is not a denormal number. Return true if successful.
9138bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9139 APFloat APFloatToConvert = ArgAPFloat;
9140 bool LosesInfo = true;
9141 APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9142 &LosesInfo);
9143 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9144 if (Success)
9145 ArgAPFloat = APFloatToConvert;
9146 return Success;
9147}
9148
9149// Bitcast the argument APInt to a double and convert it to a single precision
9150// APFloat, bitcast the APFloat to an APInt and assign it to the original
9151// argument if there is no loss in information during the conversion from
9152// double to single precision APFloat and the resulting number is not a denormal
9153// number. Return true if successful.
9154bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9155 double DpValue = ArgAPInt.bitsToDouble();
9156 APFloat APFloatDp(DpValue);
9157 bool Success = convertToNonDenormSingle(APFloatDp);
9158 if (Success)
9159 ArgAPInt = APFloatDp.bitcastToAPInt();
9160 return Success;
9161}
9162
9163// If this is a case we can't handle, return null and let the default
9164// expansion code take care of it. If we CAN select this case, and if it
9165// selects to a single instruction, return Op. Otherwise, if we can codegen
9166// this case more efficiently than a constant pool load, lower it to the
9167// sequence of ops that should be used.
9168SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9169 SelectionDAG &DAG) const {
9170 SDLoc dl(Op);
9171 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9172 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR")((BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN && \"Expected a BuildVectorSDNode in LowerBUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9172, __PRETTY_FUNCTION__))
;
9173
9174 // Check if this is a splat of a constant value.
9175 APInt APSplatBits, APSplatUndef;
9176 unsigned SplatBitSize;
9177 bool HasAnyUndefs;
9178 bool BVNIsConstantSplat =
9179 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9180 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9181
9182 // If it is a splat of a double, check if we can shrink it to a 32 bit
9183 // non-denormal float which when converted back to double gives us the same
9184 // double. This is to exploit the XXSPLTIDP instruction.
9185 if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9186 (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9187 convertToNonDenormSingle(APSplatBits)) {
9188 SDValue SplatNode = DAG.getNode(
9189 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9190 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9191 return DAG.getBitcast(Op.getValueType(), SplatNode);
9192 }
9193
9194 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9195
9196 bool IsPermutedLoad = false;
9197 const SDValue *InputLoad =
9198 getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9199 // Handle load-and-splat patterns as we have instructions that will do this
9200 // in one go.
9201 if (InputLoad && DAG.isSplatValue(Op, true)) {
9202 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9203
9204 // We have handling for 4 and 8 byte elements.
9205 unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9206
9207 // Checking for a single use of this load, we have to check for vector
9208 // width (128 bits) / ElementSize uses (since each operand of the
9209 // BUILD_VECTOR is a separate use of the value.
9210 unsigned NumUsesOfInputLD = 128 / ElementSize;
9211 for (SDValue BVInOp : Op->ops())
9212 if (BVInOp.isUndef())
9213 NumUsesOfInputLD--;
9214 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?")((NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?"
) ? static_cast<void> (0) : __assert_fail ("NumUsesOfInputLD > 0 && \"No uses of input LD of a build_vector?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9214, __PRETTY_FUNCTION__))
;
9215 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9216 ((Subtarget.hasVSX() && ElementSize == 64) ||
9217 (Subtarget.hasP9Vector() && ElementSize == 32))) {
9218 SDValue Ops[] = {
9219 LD->getChain(), // Chain
9220 LD->getBasePtr(), // Ptr
9221 DAG.getValueType(Op.getValueType()) // VT
9222 };
9223 SDValue LdSplt = DAG.getMemIntrinsicNode(
9224 PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
9225 Ops, LD->getMemoryVT(), LD->getMemOperand());
9226 // Replace all uses of the output chain of the original load with the
9227 // output chain of the new load.
9228 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9229 LdSplt.getValue(1));
9230 return LdSplt;
9231 }
9232 }
9233
9234 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9235 // lowered to VSX instructions under certain conditions.
9236 // Without VSX, there is no pattern more efficient than expanding the node.
9237 if (Subtarget.hasVSX() &&
9238 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9239 Subtarget.hasP8Vector()))
9240 return Op;
9241 return SDValue();
9242 }
9243
9244 uint64_t SplatBits = APSplatBits.getZExtValue();
9245 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9246 unsigned SplatSize = SplatBitSize / 8;
9247
9248 // First, handle single instruction cases.
9249
9250 // All zeros?
9251 if (SplatBits == 0) {
9252 // Canonicalize all zero vectors to be v4i32.
9253 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9254 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9255 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9256 }
9257 return Op;
9258 }
9259
9260 // We have XXSPLTIW for constant splats four bytes wide.
9261 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9262 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9263 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9264 // turned into a 4-byte splat of 0xABABABAB.
9265 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9266 return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
Although the value stored to 'SplatBits' is used in the enclosing expression, the value is never actually read from 'SplatBits'
9267 Op.getValueType(), DAG, dl);
9268
9269 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9270 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9271 dl);
9272
9273 // We have XXSPLTIB for constant splats one byte wide.
9274 if (Subtarget.hasP9Vector() && SplatSize == 1)
9275 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9276 dl);
9277
9278 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9279 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9280 (32-SplatBitSize));
9281 if (SextVal >= -16 && SextVal <= 15)
9282 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9283 dl);
9284
9285 // Two instruction sequences.
9286
9287 // If this value is in the range [-32,30] and is even, use:
9288 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9289 // If this value is in the range [17,31] and is odd, use:
9290 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9291 // If this value is in the range [-31,-17] and is odd, use:
9292 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9293 // Note the last two are three-instruction sequences.
9294 if (SextVal >= -32 && SextVal <= 31) {
9295 // To avoid having these optimizations undone by constant folding,
9296 // we convert to a pseudo that will be expanded later into one of
9297 // the above forms.
9298 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9299 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9300 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9301 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9302 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9303 if (VT == Op.getValueType())
9304 return RetVal;
9305 else
9306 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9307 }
9308
9309 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9310 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9311 // for fneg/fabs.
9312 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9313 // Make -1 and vspltisw -1:
9314 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9315
9316 // Make the VSLW intrinsic, computing 0x8000_0000.
9317 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9318 OnesV, DAG, dl);
9319
9320 // xor by OnesV to invert it.
9321 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9322 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9323 }
9324
9325 // Check to see if this is a wide variety of vsplti*, binop self cases.
9326 static const signed char SplatCsts[] = {
9327 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9328 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9329 };
9330
9331 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9332 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9333 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9334 int i = SplatCsts[idx];
9335
9336 // Figure out what shift amount will be used by altivec if shifted by i in
9337 // this splat size.
9338 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9339
9340 // vsplti + shl self.
9341 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9342 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9343 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9344 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9345 Intrinsic::ppc_altivec_vslw
9346 };
9347 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9348 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9349 }
9350
9351 // vsplti + srl self.
9352 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9353 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9354 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9355 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9356 Intrinsic::ppc_altivec_vsrw
9357 };
9358 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9359 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9360 }
9361
9362 // vsplti + sra self.
9363 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9364 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9365 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9366 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9367 Intrinsic::ppc_altivec_vsraw
9368 };
9369 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9370 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9371 }
9372
9373 // vsplti + rol self.
9374 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9375 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9376 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9377 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9378 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9379 Intrinsic::ppc_altivec_vrlw
9380 };
9381 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9382 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9383 }
9384
9385 // t = vsplti c, result = vsldoi t, t, 1
9386 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9387 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9388 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9389 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9390 }
9391 // t = vsplti c, result = vsldoi t, t, 2
9392 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9393 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9394 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9395 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9396 }
9397 // t = vsplti c, result = vsldoi t, t, 3
9398 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9399 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9400 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9401 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9402 }
9403 }
9404
9405 return SDValue();
9406}
9407
9408/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9409/// the specified operations to build the shuffle.
9410static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9411 SDValue RHS, SelectionDAG &DAG,
9412 const SDLoc &dl) {
9413 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9414 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9415 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9416
9417 enum {
9418 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9419 OP_VMRGHW,
9420 OP_VMRGLW,
9421 OP_VSPLTISW0,
9422 OP_VSPLTISW1,
9423 OP_VSPLTISW2,
9424 OP_VSPLTISW3,
9425 OP_VSLDOI4,
9426 OP_VSLDOI8,
9427 OP_VSLDOI12
9428 };
9429
9430 if (OpNum == OP_COPY) {
9431 if (LHSID == (1*9+2)*9+3) return LHS;
9432 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9432, __PRETTY_FUNCTION__))
;
9433 return RHS;
9434 }
9435
9436 SDValue OpLHS, OpRHS;
9437 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9438 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9439
9440 int ShufIdxs[16];
9441 switch (OpNum) {
9442 default: llvm_unreachable("Unknown i32 permute!")::llvm::llvm_unreachable_internal("Unknown i32 permute!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9442)
;
9443 case OP_VMRGHW:
9444 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9445 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9446 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9447 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9448 break;
9449 case OP_VMRGLW:
9450 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9451 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9452 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9453 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9454 break;
9455 case OP_VSPLTISW0:
9456 for (unsigned i = 0; i != 16; ++i)
9457 ShufIdxs[i] = (i&3)+0;
9458 break;
9459 case OP_VSPLTISW1:
9460 for (unsigned i = 0; i != 16; ++i)
9461 ShufIdxs[i] = (i&3)+4;
9462 break;
9463 case OP_VSPLTISW2:
9464 for (unsigned i = 0; i != 16; ++i)
9465 ShufIdxs[i] = (i&3)+8;
9466 break;
9467 case OP_VSPLTISW3:
9468 for (unsigned i = 0; i != 16; ++i)
9469 ShufIdxs[i] = (i&3)+12;
9470 break;
9471 case OP_VSLDOI4:
9472 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9473 case OP_VSLDOI8:
9474 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9475 case OP_VSLDOI12:
9476 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9477 }
9478 EVT VT = OpLHS.getValueType();
9479 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9480 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9481 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9482 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9483}
9484
9485/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9486/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9487/// SDValue.
9488SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9489 SelectionDAG &DAG) const {
9490 const unsigned BytesInVector = 16;
9491 bool IsLE = Subtarget.isLittleEndian();
9492 SDLoc dl(N);
9493 SDValue V1 = N->getOperand(0);
9494 SDValue V2 = N->getOperand(1);
9495 unsigned ShiftElts = 0, InsertAtByte = 0;
9496 bool Swap = false;
9497
9498 // Shifts required to get the byte we want at element 7.
9499 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9500 0, 15, 14, 13, 12, 11, 10, 9};
9501 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9502 1, 2, 3, 4, 5, 6, 7, 8};
9503
9504 ArrayRef<int> Mask = N->getMask();
9505 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9506
9507 // For each mask element, find out if we're just inserting something
9508 // from V2 into V1 or vice versa.
9509 // Possible permutations inserting an element from V2 into V1:
9510 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9511 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9512 // ...
9513 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9514 // Inserting from V1 into V2 will be similar, except mask range will be
9515 // [16,31].
9516
9517 bool FoundCandidate = false;
9518 // If both vector operands for the shuffle are the same vector, the mask
9519 // will contain only elements from the first one and the second one will be
9520 // undef.
9521 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9522 // Go through the mask of half-words to find an element that's being moved
9523 // from one vector to the other.
9524 for (unsigned i = 0; i < BytesInVector; ++i) {
9525 unsigned CurrentElement = Mask[i];
9526 // If 2nd operand is undefined, we should only look for element 7 in the
9527 // Mask.
9528 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9529 continue;
9530
9531 bool OtherElementsInOrder = true;
9532 // Examine the other elements in the Mask to see if they're in original
9533 // order.
9534 for (unsigned j = 0; j < BytesInVector; ++j) {
9535 if (j == i)
9536 continue;
9537 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9538 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9539 // in which we always assume we're always picking from the 1st operand.
9540 int MaskOffset =
9541 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9542 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9543 OtherElementsInOrder = false;
9544 break;
9545 }
9546 }
9547 // If other elements are in original order, we record the number of shifts
9548 // we need to get the element we want into element 7. Also record which byte
9549 // in the vector we should insert into.
9550 if (OtherElementsInOrder) {
9551 // If 2nd operand is undefined, we assume no shifts and no swapping.
9552 if (V2.isUndef()) {
9553 ShiftElts = 0;
9554 Swap = false;
9555 } else {
9556 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9557 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9558 : BigEndianShifts[CurrentElement & 0xF];
9559 Swap = CurrentElement < BytesInVector;
9560 }
9561 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9562 FoundCandidate = true;
9563 break;
9564 }
9565 }
9566
9567 if (!FoundCandidate)
9568 return SDValue();
9569
9570 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9571 // optionally with VECSHL if shift is required.
9572 if (Swap)
9573 std::swap(V1, V2);
9574 if (V2.isUndef())
9575 V2 = V1;
9576 if (ShiftElts) {
9577 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9578 DAG.getConstant(ShiftElts, dl, MVT::i32));
9579 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9580 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9581 }
9582 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9583 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9584}
9585
9586/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9587/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9588/// SDValue.
9589SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9590 SelectionDAG &DAG) const {
9591 const unsigned NumHalfWords = 8;
9592 const unsigned BytesInVector = NumHalfWords * 2;
9593 // Check that the shuffle is on half-words.
9594 if (!isNByteElemShuffleMask(N, 2, 1))
9595 return SDValue();
9596
9597 bool IsLE = Subtarget.isLittleEndian();
9598 SDLoc dl(N);
9599 SDValue V1 = N->getOperand(0);
9600 SDValue V2 = N->getOperand(1);
9601 unsigned ShiftElts = 0, InsertAtByte = 0;
9602 bool Swap = false;
9603
9604 // Shifts required to get the half-word we want at element 3.
9605 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9606 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9607
9608 uint32_t Mask = 0;
9609 uint32_t OriginalOrderLow = 0x1234567;
9610 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9611 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9612 // 32-bit space, only need 4-bit nibbles per element.
9613 for (unsigned i = 0; i < NumHalfWords; ++i) {
9614 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9615 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9616 }
9617
9618 // For each mask element, find out if we're just inserting something
9619 // from V2 into V1 or vice versa. Possible permutations inserting an element
9620 // from V2 into V1:
9621 // X, 1, 2, 3, 4, 5, 6, 7
9622 // 0, X, 2, 3, 4, 5, 6, 7
9623 // 0, 1, X, 3, 4, 5, 6, 7
9624 // 0, 1, 2, X, 4, 5, 6, 7
9625 // 0, 1, 2, 3, X, 5, 6, 7
9626 // 0, 1, 2, 3, 4, X, 6, 7
9627 // 0, 1, 2, 3, 4, 5, X, 7
9628 // 0, 1, 2, 3, 4, 5, 6, X
9629 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9630
9631 bool FoundCandidate = false;
9632 // Go through the mask of half-words to find an element that's being moved
9633 // from one vector to the other.
9634 for (unsigned i = 0; i < NumHalfWords; ++i) {
9635 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9636 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9637 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9638 uint32_t TargetOrder = 0x0;
9639
9640 // If both vector operands for the shuffle are the same vector, the mask
9641 // will contain only elements from the first one and the second one will be
9642 // undef.
9643 if (V2.isUndef()) {
9644 ShiftElts = 0;
9645 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9646 TargetOrder = OriginalOrderLow;
9647 Swap = false;
9648 // Skip if not the correct element or mask of other elements don't equal
9649 // to our expected order.
9650 if (MaskOneElt == VINSERTHSrcElem &&
9651 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9652 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9653 FoundCandidate = true;
9654 break;
9655 }
9656 } else { // If both operands are defined.
9657 // Target order is [8,15] if the current mask is between [0,7].
9658 TargetOrder =
9659 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9660 // Skip if mask of other elements don't equal our expected order.
9661 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9662 // We only need the last 3 bits for the number of shifts.
9663 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9664 : BigEndianShifts[MaskOneElt & 0x7];
9665 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9666 Swap = MaskOneElt < NumHalfWords;
9667 FoundCandidate = true;
9668 break;
9669 }
9670 }
9671 }
9672
9673 if (!FoundCandidate)
9674 return SDValue();
9675
9676 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9677 // optionally with VECSHL if shift is required.
9678 if (Swap)
9679 std::swap(V1, V2);
9680 if (V2.isUndef())
9681 V2 = V1;
9682 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9683 if (ShiftElts) {
9684 // Double ShiftElts because we're left shifting on v16i8 type.
9685 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9686 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9687 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9688 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9689 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9690 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9691 }
9692 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9693 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9694 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9695 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9696}
9697
9698/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9699/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9700/// return the default SDValue.
9701SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9702 SelectionDAG &DAG) const {
9703 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9704 // to v16i8. Peek through the bitcasts to get the actual operands.
9705 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9706 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9707
9708 auto ShuffleMask = SVN->getMask();
9709 SDValue VecShuffle(SVN, 0);
9710 SDLoc DL(SVN);
9711
9712 // Check that we have a four byte shuffle.
9713 if (!isNByteElemShuffleMask(SVN, 4, 1))
9714 return SDValue();
9715
9716 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9717 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9718 std::swap(LHS, RHS);
9719 VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9720 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9721 }
9722
9723 // Ensure that the RHS is a vector of constants.
9724 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9725 if (!BVN)
9726 return SDValue();
9727
9728 // Check if RHS is a splat of 4-bytes (or smaller).
9729 APInt APSplatValue, APSplatUndef;
9730 unsigned SplatBitSize;
9731 bool HasAnyUndefs;
9732 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9733 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9734 SplatBitSize > 32)
9735 return SDValue();
9736
9737 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9738 // The instruction splats a constant C into two words of the source vector
9739 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9740 // Thus we check that the shuffle mask is the equivalent of
9741 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9742 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9743 // within each word are consecutive, so we only need to check the first byte.
9744 SDValue Index;
9745 bool IsLE = Subtarget.isLittleEndian();
9746 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9747 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9748 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9749 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9750 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9751 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9752 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9753 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9754 else
9755 return SDValue();
9756
9757 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9758 // for XXSPLTI32DX.
9759 unsigned SplatVal = APSplatValue.getZExtValue();
9760 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9761 SplatVal |= (SplatVal << SplatBitSize);
9762
9763 SDValue SplatNode = DAG.getNode(
9764 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9765 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9766 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9767}
9768
9769/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9770/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9771/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9772/// i.e (or (shl x, C1), (srl x, 128-C1)).
9773SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9774 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL")((Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ROTL && \"Should only be called for ISD::ROTL\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9774, __PRETTY_FUNCTION__))
;
9775 assert(Op.getValueType() == MVT::v1i128 &&((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9776, __PRETTY_FUNCTION__))
9776 "Only set v1i128 as custom, other type shouldn't reach here!")((Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::v1i128 && \"Only set v1i128 as custom, other type shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9776, __PRETTY_FUNCTION__))
;
9777 SDLoc dl(Op);
9778 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9779 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9780 unsigned SHLAmt = N1.getConstantOperandVal(0);
9781 if (SHLAmt % 8 == 0) {
9782 SmallVector<int, 16> Mask(16, 0);
9783 std::iota(Mask.begin(), Mask.end(), 0);
9784 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9785 if (SDValue Shuffle =
9786 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9787 DAG.getUNDEF(MVT::v16i8), Mask))
9788 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9789 }
9790 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9791 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9792 DAG.getConstant(SHLAmt, dl, MVT::i32));
9793 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9794 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9795 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9796 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9797}
9798
9799/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9800/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9801/// return the code it can be lowered into. Worst case, it can always be
9802/// lowered into a vperm.
9803SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9804 SelectionDAG &DAG) const {
9805 SDLoc dl(Op);
9806 SDValue V1 = Op.getOperand(0);
9807 SDValue V2 = Op.getOperand(1);
9808 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9809
9810 // Any nodes that were combined in the target-independent combiner prior
9811 // to vector legalization will not be sent to the target combine. Try to
9812 // combine it here.
9813 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9814 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9815 return NewShuffle;
9816 Op = NewShuffle;
9817 SVOp = cast<ShuffleVectorSDNode>(Op);
9818 V1 = Op.getOperand(0);
9819 V2 = Op.getOperand(1);
9820 }
9821 EVT VT = Op.getValueType();
9822 bool isLittleEndian = Subtarget.isLittleEndian();
9823
9824 unsigned ShiftElts, InsertAtByte;
9825 bool Swap = false;
9826
9827 // If this is a load-and-splat, we can do that with a single instruction
9828 // in some cases. However if the load has multiple uses, we don't want to
9829 // combine it because that will just produce multiple loads.
9830 bool IsPermutedLoad = false;
9831 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9832 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9833 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9834 InputLoad->hasOneUse()) {
9835 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9836 int SplatIdx =
9837 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9838
9839 // The splat index for permuted loads will be in the left half of the vector
9840 // which is strictly wider than the loaded value by 8 bytes. So we need to
9841 // adjust the splat index to point to the correct address in memory.
9842 if (IsPermutedLoad) {
9843 assert(isLittleEndian && "Unexpected permuted load on big endian target")((isLittleEndian && "Unexpected permuted load on big endian target"
) ? static_cast<void> (0) : __assert_fail ("isLittleEndian && \"Unexpected permuted load on big endian target\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9843, __PRETTY_FUNCTION__))
;
9844 SplatIdx += IsFourByte ? 2 : 1;
9845 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9846, __PRETTY_FUNCTION__))
9846 "Splat of a value outside of the loaded memory")(((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"
) ? static_cast<void> (0) : __assert_fail ("(SplatIdx < (IsFourByte ? 4 : 2)) && \"Splat of a value outside of the loaded memory\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 9846, __PRETTY_FUNCTION__))
;
9847 }
9848
9849 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9850 // For 4-byte load-and-splat, we need Power9.
9851 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9852 uint64_t Offset = 0;
9853 if (IsFourByte)
9854 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9855 else
9856 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9857
9858 SDValue BasePtr = LD->getBasePtr();
9859 if (Offset != 0)
9860 BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9861 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9862 SDValue Ops[] = {
9863 LD->getChain(), // Chain
9864 BasePtr, // BasePtr
9865 DAG.getValueType(Op.getValueType()) // VT
9866 };
9867 SDVTList VTL =
9868 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9869 SDValue LdSplt =
9870 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9871 Ops, LD->getMemoryVT(), LD->getMemOperand());
9872 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9873 if (LdSplt.getValueType() != SVOp->getValueType(0))
9874 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9875 return LdSplt;
9876 }
9877 }
9878 if (Subtarget.hasP9Vector() &&
9879 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9880 isLittleEndian)) {
9881 if (Swap)
9882 std::swap(V1, V2);
9883 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9884 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9885 if (ShiftElts) {
9886 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9887 DAG.getConstant(ShiftElts, dl, MVT::i32));
9888 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9889 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9890 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9891 }
9892 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9893 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9894 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9895 }
9896
9897 if (Subtarget.hasPrefixInstrs()) {
9898 SDValue SplatInsertNode;
9899 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9900 return SplatInsertNode;
9901 }
9902
9903 if (Subtarget.hasP9Altivec()) {
9904 SDValue NewISDNode;
9905 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9906 return NewISDNode;
9907
9908 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9909 return NewISDNode;
9910 }
9911
9912 if (Subtarget.hasVSX() &&
9913 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9914 if (Swap)
9915 std::swap(V1, V2);
9916 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9917 SDValue Conv2 =
9918 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9919
9920 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9921 DAG.getConstant(ShiftElts, dl, MVT::i32));
9922 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9923 }
9924
9925 if (Subtarget.hasVSX() &&
9926 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9927 if (Swap)
9928 std::swap(V1, V2);
9929 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9930 SDValue Conv2 =
9931 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9932
9933 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9934 DAG.getConstant(ShiftElts, dl, MVT::i32));
9935 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9936 }
9937
9938 if (Subtarget.hasP9Vector()) {
9939 if (PPC::isXXBRHShuffleMask(SVOp)) {
9940 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9941 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9942 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9943 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9944 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9945 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9946 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9947 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9948 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9949 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9950 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9951 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9952 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9953 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9954 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9955 }
9956 }
9957
9958 if (Subtarget.hasVSX()) {
9959 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9960 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9961
9962 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9963 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9964 DAG.getConstant(SplatIdx, dl, MVT::i32));
9965 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9966 }
9967
9968 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9969 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9970 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9971 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9972 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9973 }
9974 }
9975
9976 // Cases that are handled by instructions that take permute immediates
9977 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9978 // selected by the instruction selector.
9979 if (V2.isUndef()) {
9980 if (PPC::isSplatShuffleMask(SVOp, 1) ||
9981 PPC::isSplatShuffleMask(SVOp, 2) ||
9982 PPC::isSplatShuffleMask(SVOp, 4) ||
9983 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9984 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9985 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9986 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9987 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9988 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9989 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9990 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9991 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9992 (Subtarget.hasP8Altivec() && (
9993 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9994 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9995 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9996 return Op;
9997 }
9998 }
9999
10000 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10001 // and produce a fixed permutation. If any of these match, do not lower to
10002 // VPERM.
10003 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10004 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10005 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10006 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10007 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10008 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10009 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10010 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10011 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10012 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10013 (Subtarget.hasP8Altivec() && (
10014 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10015 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10016 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10017 return Op;
10018
10019 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10020 // perfect shuffle table to emit an optimal matching sequence.
10021 ArrayRef<int> PermMask = SVOp->getMask();
10022
10023 unsigned PFIndexes[4];
10024 bool isFourElementShuffle = true;
10025 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10026 unsigned EltNo = 8; // Start out undef.
10027 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10028 if (PermMask[i*4+j] < 0)
10029 continue; // Undef, ignore it.
10030
10031 unsigned ByteSource = PermMask[i*4+j];
10032 if ((ByteSource & 3) != j) {
10033 isFourElementShuffle = false;
10034 break;
10035 }
10036
10037 if (EltNo == 8) {
10038 EltNo = ByteSource/4;
10039 } else if (EltNo != ByteSource/4) {
10040 isFourElementShuffle = false;
10041 break;
10042 }
10043 }
10044 PFIndexes[i] = EltNo;
10045 }
10046
10047 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10048 // perfect shuffle vector to determine if it is cost effective to do this as
10049 // discrete instructions, or whether we should use a vperm.
10050 // For now, we skip this for little endian until such time as we have a
10051 // little-endian perfect shuffle table.
10052 if (isFourElementShuffle && !isLittleEndian) {
10053 // Compute the index in the perfect shuffle table.
10054 unsigned PFTableIndex =
10055 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10056
10057 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10058 unsigned Cost = (PFEntry >> 30);
10059
10060 // Determining when to avoid vperm is tricky. Many things affect the cost
10061 // of vperm, particularly how many times the perm mask needs to be computed.
10062 // For example, if the perm mask can be hoisted out of a loop or is already
10063 // used (perhaps because there are multiple permutes with the same shuffle
10064 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
10065 // the loop requires an extra register.
10066 //
10067 // As a compromise, we only emit discrete instructions if the shuffle can be
10068 // generated in 3 or fewer operations. When we have loop information
10069 // available, if this block is within a loop, we should avoid using vperm
10070 // for 3-operation perms and use a constant pool load instead.
10071 if (Cost < 3)
10072 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10073 }
10074
10075 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10076 // vector that will get spilled to the constant pool.
10077 if (V2.isUndef()) V2 = V1;
10078
10079 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10080 // that it is in input element units, not in bytes. Convert now.
10081
10082 // For little endian, the order of the input vectors is reversed, and
10083 // the permutation mask is complemented with respect to 31. This is
10084 // necessary to produce proper semantics with the big-endian-biased vperm
10085 // instruction.
10086 EVT EltVT = V1.getValueType().getVectorElementType();
10087 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10088
10089 SmallVector<SDValue, 16> ResultMask;
10090 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10091 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10092
10093 for (unsigned j = 0; j != BytesPerElement; ++j)
10094 if (isLittleEndian)
10095 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10096 dl, MVT::i32));
10097 else
10098 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10099 MVT::i32));
10100 }
10101
10102 ShufflesHandledWithVPERM++;
10103 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10104 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "Emitting a VPERM for the following shuffle:\n"
; } } while (false)
;
10105 LLVM_DEBUG(SVOp->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { SVOp->dump(); } } while (false)
;
10106 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { dbgs() << "With the following permute control vector:\n"
; } } while (false)
;
10107 LLVM_DEBUG(VPermMask.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-lowering")) { VPermMask.dump(); } } while (false)
;
10108
10109 if (isLittleEndian)
10110 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10111 V2, V1, VPermMask);
10112 else
10113 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10114 V1, V2, VPermMask);
10115}
10116
10117/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10118/// vector comparison. If it is, return true and fill in Opc/isDot with
10119/// information about the intrinsic.
10120static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10121 bool &isDot, const PPCSubtarget &Subtarget) {
10122 unsigned IntrinsicID =
10123 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10124 CompareOpc = -1;
10125 isDot = false;
10126 switch (IntrinsicID) {
10127 default:
10128 return false;
10129 // Comparison predicates.
10130 case Intrinsic::ppc_altivec_vcmpbfp_p:
10131 CompareOpc = 966;
10132 isDot = true;
10133 break;
10134 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10135 CompareOpc = 198;
10136 isDot = true;
10137 break;
10138 case Intrinsic::ppc_altivec_vcmpequb_p:
10139 CompareOpc = 6;
10140 isDot = true;
10141 break;
10142 case Intrinsic::ppc_altivec_vcmpequh_p:
10143 CompareOpc = 70;
10144 isDot = true;
10145 break;
10146 case Intrinsic::ppc_altivec_vcmpequw_p:
10147 CompareOpc = 134;
10148 isDot = true;
10149 break;
10150 case Intrinsic::ppc_altivec_vcmpequd_p:
10151 if (Subtarget.hasP8Altivec()) {
10152 CompareOpc = 199;
10153 isDot = true;
10154 } else
10155 return false;
10156 break;
10157 case Intrinsic::ppc_altivec_vcmpneb_p:
10158 case Intrinsic::ppc_altivec_vcmpneh_p:
10159 case Intrinsic::ppc_altivec_vcmpnew_p:
10160 case Intrinsic::ppc_altivec_vcmpnezb_p:
10161 case Intrinsic::ppc_altivec_vcmpnezh_p:
10162 case Intrinsic::ppc_altivec_vcmpnezw_p:
10163 if (Subtarget.hasP9Altivec()) {
10164 switch (IntrinsicID) {
10165 default:
10166 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10166)
;
10167 case Intrinsic::ppc_altivec_vcmpneb_p:
10168 CompareOpc = 7;
10169 break;
10170 case Intrinsic::ppc_altivec_vcmpneh_p:
10171 CompareOpc = 71;
10172 break;
10173 case Intrinsic::ppc_altivec_vcmpnew_p:
10174 CompareOpc = 135;
10175 break;
10176 case Intrinsic::ppc_altivec_vcmpnezb_p:
10177 CompareOpc = 263;
10178 break;
10179 case Intrinsic::ppc_altivec_vcmpnezh_p:
10180 CompareOpc = 327;
10181 break;
10182 case Intrinsic::ppc_altivec_vcmpnezw_p:
10183 CompareOpc = 391;
10184 break;
10185 }
10186 isDot = true;
10187 } else
10188 return false;
10189 break;
10190 case Intrinsic::ppc_altivec_vcmpgefp_p:
10191 CompareOpc = 454;
10192 isDot = true;
10193 break;
10194 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10195 CompareOpc = 710;
10196 isDot = true;
10197 break;
10198 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10199 CompareOpc = 774;
10200 isDot = true;
10201 break;
10202 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10203 CompareOpc = 838;
10204 isDot = true;
10205 break;
10206 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10207 CompareOpc = 902;
10208 isDot = true;
10209 break;
10210 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10211 if (Subtarget.hasP8Altivec()) {
10212 CompareOpc = 967;
10213 isDot = true;
10214 } else
10215 return false;
10216 break;
10217 case Intrinsic::ppc_altivec_vcmpgtub_p:
10218 CompareOpc = 518;
10219 isDot = true;
10220 break;
10221 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10222 CompareOpc = 582;
10223 isDot = true;
10224 break;
10225 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10226 CompareOpc = 646;
10227 isDot = true;
10228 break;
10229 case Intrinsic::ppc_altivec_vcmpgtud_p:
10230 if (Subtarget.hasP8Altivec()) {
10231 CompareOpc = 711;
10232 isDot = true;
10233 } else
10234 return false;
10235 break;
10236
10237 case Intrinsic::ppc_altivec_vcmpequq:
10238 case Intrinsic::ppc_altivec_vcmpgtsq:
10239 case Intrinsic::ppc_altivec_vcmpgtuq:
10240 if (!Subtarget.isISA3_1())
10241 return false;
10242 switch (IntrinsicID) {
10243 default:
10244 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10244)
;
10245 case Intrinsic::ppc_altivec_vcmpequq:
10246 CompareOpc = 455;
10247 break;
10248 case Intrinsic::ppc_altivec_vcmpgtsq:
10249 CompareOpc = 903;
10250 break;
10251 case Intrinsic::ppc_altivec_vcmpgtuq:
10252 CompareOpc = 647;
10253 break;
10254 }
10255 break;
10256
10257 // VSX predicate comparisons use the same infrastructure
10258 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10259 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10260 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10261 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10262 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10263 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10264 if (Subtarget.hasVSX()) {
10265 switch (IntrinsicID) {
10266 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10267 CompareOpc = 99;
10268 break;
10269 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10270 CompareOpc = 115;
10271 break;
10272 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10273 CompareOpc = 107;
10274 break;
10275 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10276 CompareOpc = 67;
10277 break;
10278 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10279 CompareOpc = 83;
10280 break;
10281 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10282 CompareOpc = 75;
10283 break;
10284 }
10285 isDot = true;
10286 } else
10287 return false;
10288 break;
10289
10290 // Normal Comparisons.
10291 case Intrinsic::ppc_altivec_vcmpbfp:
10292 CompareOpc = 966;
10293 break;
10294 case Intrinsic::ppc_altivec_vcmpeqfp:
10295 CompareOpc = 198;
10296 break;
10297 case Intrinsic::ppc_altivec_vcmpequb:
10298 CompareOpc = 6;
10299 break;
10300 case Intrinsic::ppc_altivec_vcmpequh:
10301 CompareOpc = 70;
10302 break;
10303 case Intrinsic::ppc_altivec_vcmpequw:
10304 CompareOpc = 134;
10305 break;
10306 case Intrinsic::ppc_altivec_vcmpequd:
10307 if (Subtarget.hasP8Altivec())
10308 CompareOpc = 199;
10309 else
10310 return false;
10311 break;
10312 case Intrinsic::ppc_altivec_vcmpneb:
10313 case Intrinsic::ppc_altivec_vcmpneh:
10314 case Intrinsic::ppc_altivec_vcmpnew:
10315 case Intrinsic::ppc_altivec_vcmpnezb:
10316 case Intrinsic::ppc_altivec_vcmpnezh:
10317 case Intrinsic::ppc_altivec_vcmpnezw:
10318 if (Subtarget.hasP9Altivec())
10319 switch (IntrinsicID) {
10320 default:
10321 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10321)
;
10322 case Intrinsic::ppc_altivec_vcmpneb:
10323 CompareOpc = 7;
10324 break;
10325 case Intrinsic::ppc_altivec_vcmpneh:
10326 CompareOpc = 71;
10327 break;
10328 case Intrinsic::ppc_altivec_vcmpnew:
10329 CompareOpc = 135;
10330 break;
10331 case Intrinsic::ppc_altivec_vcmpnezb:
10332 CompareOpc = 263;
10333 break;
10334 case Intrinsic::ppc_altivec_vcmpnezh:
10335 CompareOpc = 327;
10336 break;
10337 case Intrinsic::ppc_altivec_vcmpnezw:
10338 CompareOpc = 391;
10339 break;
10340 }
10341 else
10342 return false;
10343 break;
10344 case Intrinsic::ppc_altivec_vcmpgefp:
10345 CompareOpc = 454;
10346 break;
10347 case Intrinsic::ppc_altivec_vcmpgtfp:
10348 CompareOpc = 710;
10349 break;
10350 case Intrinsic::ppc_altivec_vcmpgtsb:
10351 CompareOpc = 774;
10352 break;
10353 case Intrinsic::ppc_altivec_vcmpgtsh:
10354 CompareOpc = 838;
10355 break;
10356 case Intrinsic::ppc_altivec_vcmpgtsw:
10357 CompareOpc = 902;
10358 break;
10359 case Intrinsic::ppc_altivec_vcmpgtsd:
10360 if (Subtarget.hasP8Altivec())
10361 CompareOpc = 967;
10362 else
10363 return false;
10364 break;
10365 case Intrinsic::ppc_altivec_vcmpgtub:
10366 CompareOpc = 518;
10367 break;
10368 case Intrinsic::ppc_altivec_vcmpgtuh:
10369 CompareOpc = 582;
10370 break;
10371 case Intrinsic::ppc_altivec_vcmpgtuw:
10372 CompareOpc = 646;
10373 break;
10374 case Intrinsic::ppc_altivec_vcmpgtud:
10375 if (Subtarget.hasP8Altivec())
10376 CompareOpc = 711;
10377 else
10378 return false;
10379 break;
10380 case Intrinsic::ppc_altivec_vcmpequq_p:
10381 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10382 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10383 if (!Subtarget.isISA3_1())
10384 return false;
10385 switch (IntrinsicID) {
10386 default:
10387 llvm_unreachable("Unknown comparison intrinsic.")::llvm::llvm_unreachable_internal("Unknown comparison intrinsic."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10387)
;
10388 case Intrinsic::ppc_altivec_vcmpequq_p:
10389 CompareOpc = 455;
10390 break;
10391 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10392 CompareOpc = 903;
10393 break;
10394 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10395 CompareOpc = 647;
10396 break;
10397 }
10398 isDot = true;
10399 break;
10400 }
10401 return true;
10402}
10403
10404/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10405/// lower, do it, otherwise return null.
10406SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10407 SelectionDAG &DAG) const {
10408 unsigned IntrinsicID =
10409 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10410
10411 SDLoc dl(Op);
10412
10413 switch (IntrinsicID) {
10414 case Intrinsic::thread_pointer:
10415 // Reads the thread pointer register, used for __builtin_thread_pointer.
10416 if (Subtarget.isPPC64())
10417 return DAG.getRegister(PPC::X13, MVT::i64);
10418 return DAG.getRegister(PPC::R2, MVT::i32);
10419
10420 case Intrinsic::ppc_mma_disassemble_acc:
10421 case Intrinsic::ppc_mma_disassemble_pair: {
10422 int NumVecs = 2;
10423 SDValue WideVec = Op.getOperand(1);
10424 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10425 NumVecs = 4;
10426 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10427 }
10428 SmallVector<SDValue, 4> RetOps;
10429 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10430 SDValue Extract = DAG.getNode(
10431 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10432 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10433 : VecNo,
10434 dl, MVT::i64));
10435 RetOps.push_back(Extract);
10436 }
10437 return DAG.getMergeValues(RetOps, dl);
10438 }
10439 }
10440
10441 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10442 // opcode number of the comparison.
10443 int CompareOpc;
10444 bool isDot;
10445 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10446 return SDValue(); // Don't custom lower most intrinsics.
10447
10448 // If this is a non-dot comparison, make the VCMP node and we are done.
10449 if (!isDot) {
10450 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10451 Op.getOperand(1), Op.getOperand(2),
10452 DAG.getConstant(CompareOpc, dl, MVT::i32));
10453 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10454 }
10455
10456 // Create the PPCISD altivec 'dot' comparison node.
10457 SDValue Ops[] = {
10458 Op.getOperand(2), // LHS
10459 Op.getOperand(3), // RHS
10460 DAG.getConstant(CompareOpc, dl, MVT::i32)
10461 };
10462 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10463 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10464
10465 // Now that we have the comparison, emit a copy from the CR to a GPR.
10466 // This is flagged to the above dot comparison.
10467 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10468 DAG.getRegister(PPC::CR6, MVT::i32),
10469 CompNode.getValue(1));
10470
10471 // Unpack the result based on how the target uses it.
10472 unsigned BitNo; // Bit # of CR6.
10473 bool InvertBit; // Invert result?
10474 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10475 default: // Can't happen, don't crash on invalid number though.
10476 case 0: // Return the value of the EQ bit of CR6.
10477 BitNo = 0; InvertBit = false;
10478 break;
10479 case 1: // Return the inverted value of the EQ bit of CR6.
10480 BitNo = 0; InvertBit = true;
10481 break;
10482 case 2: // Return the value of the LT bit of CR6.
10483 BitNo = 2; InvertBit = false;
10484 break;
10485 case 3: // Return the inverted value of the LT bit of CR6.
10486 BitNo = 2; InvertBit = true;
10487 break;
10488 }
10489
10490 // Shift the bit into the low position.
10491 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10492 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10493 // Isolate the bit.
10494 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10495 DAG.getConstant(1, dl, MVT::i32));
10496
10497 // If we are supposed to, toggle the bit.
10498 if (InvertBit)
10499 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10500 DAG.getConstant(1, dl, MVT::i32));
10501 return Flags;
10502}
10503
10504SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10505 SelectionDAG &DAG) const {
10506 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10507 // the beginning of the argument list.
10508 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10509 SDLoc DL(Op);
10510 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10511 case Intrinsic::ppc_cfence: {
10512 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.")((ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."
) ? static_cast<void> (0) : __assert_fail ("ArgStart == 1 && \"llvm.ppc.cfence must carry a chain argument.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10512, __PRETTY_FUNCTION__))
;
10513 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.")((Subtarget.isPPC64() && "Only 64-bit is supported for now."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.isPPC64() && \"Only 64-bit is supported for now.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10513, __PRETTY_FUNCTION__))
;
10514 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10515 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10516 Op.getOperand(ArgStart + 1)),
10517 Op.getOperand(0)),
10518 0);
10519 }
10520 default:
10521 break;
10522 }
10523 return SDValue();
10524}
10525
10526// Lower scalar BSWAP64 to xxbrd.
10527SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10528 SDLoc dl(Op);
10529 // MTVSRDD
10530 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10531 Op.getOperand(0));
10532 // XXBRD
10533 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10534 // MFVSRD
10535 int VectorIndex = 0;
10536 if (Subtarget.isLittleEndian())
10537 VectorIndex = 1;
10538 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10539 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10540 return Op;
10541}
10542
10543// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10544// compared to a value that is atomically loaded (atomic loads zero-extend).
10545SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10546 SelectionDAG &DAG) const {
10547 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10548, __PRETTY_FUNCTION__))
10548 "Expecting an atomic compare-and-swap here.")((Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && \"Expecting an atomic compare-and-swap here.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10548, __PRETTY_FUNCTION__))
;
10549 SDLoc dl(Op);
10550 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10551 EVT MemVT = AtomicNode->getMemoryVT();
10552 if (MemVT.getSizeInBits() >= 32)
10553 return Op;
10554
10555 SDValue CmpOp = Op.getOperand(2);
10556 // If this is already correctly zero-extended, leave it alone.
10557 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10558 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10559 return Op;
10560
10561 // Clear the high bits of the compare operand.
10562 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10563 SDValue NewCmpOp =
10564 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10565 DAG.getConstant(MaskVal, dl, MVT::i32));
10566
10567 // Replace the existing compare operand with the properly zero-extended one.
10568 SmallVector<SDValue, 4> Ops;
10569 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10570 Ops.push_back(AtomicNode->getOperand(i));
10571 Ops[2] = NewCmpOp;
10572 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10573 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10574 auto NodeTy =
10575 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10576 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10577}
10578
10579SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10580 SelectionDAG &DAG) const {
10581 SDLoc dl(Op);
10582 // Create a stack slot that is 16-byte aligned.
10583 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10584 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10585 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10586 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10587
10588 // Store the input value into Value#0 of the stack slot.
10589 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10590 MachinePointerInfo());
10591 // Load it out.
10592 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10593}
10594
10595SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10596 SelectionDAG &DAG) const {
10597 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10598, __PRETTY_FUNCTION__))
10598 "Should only be called for ISD::INSERT_VECTOR_ELT")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Should only be called for ISD::INSERT_VECTOR_ELT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10598, __PRETTY_FUNCTION__))
;
10599
10600 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10601 // We have legal lowering for constant indices but not for variable ones.
10602 if (!C)
10603 return SDValue();
10604
10605 EVT VT = Op.getValueType();
10606 SDLoc dl(Op);
10607 SDValue V1 = Op.getOperand(0);
10608 SDValue V2 = Op.getOperand(1);
10609 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10610 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10611 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10612 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10613 unsigned InsertAtElement = C->getZExtValue();
10614 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10615 if (Subtarget.isLittleEndian()) {
10616 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10617 }
10618 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10619 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10620 }
10621 return Op;
10622}
10623
10624SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10625 SelectionDAG &DAG) const {
10626 SDLoc dl(Op);
10627 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10628 SDValue LoadChain = LN->getChain();
10629 SDValue BasePtr = LN->getBasePtr();
10630 EVT VT = Op.getValueType();
10631
10632 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10633 return Op;
10634
10635 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10636 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10637 // 2 or 4 vsx registers.
10638 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))
10639 "Type unsupported without MMA")(((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10639, __PRETTY_FUNCTION__))
;
10640 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10641, __PRETTY_FUNCTION__))
10641 "Type unsupported without paired vector support")(((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10641, __PRETTY_FUNCTION__))
;
10642 Align Alignment = LN->getAlign();
10643 SmallVector<SDValue, 4> Loads;
10644 SmallVector<SDValue, 4> LoadChains;
10645 unsigned NumVecs = VT.getSizeInBits() / 128;
10646 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10647 SDValue Load =
10648 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10649 LN->getPointerInfo().getWithOffset(Idx * 16),
10650 commonAlignment(Alignment, Idx * 16),
10651 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10652 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10653 DAG.getConstant(16, dl, BasePtr.getValueType()));
10654 Loads.push_back(Load);
10655 LoadChains.push_back(Load.getValue(1));
10656 }
10657 if (Subtarget.isLittleEndian()) {
10658 std::reverse(Loads.begin(), Loads.end());
10659 std::reverse(LoadChains.begin(), LoadChains.end());
10660 }
10661 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10662 SDValue Value =
10663 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10664 dl, VT, Loads);
10665 SDValue RetOps[] = {Value, TF};
10666 return DAG.getMergeValues(RetOps, dl);
10667}
10668
10669SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10670 SelectionDAG &DAG) const {
10671 SDLoc dl(Op);
10672 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10673 SDValue StoreChain = SN->getChain();
10674 SDValue BasePtr = SN->getBasePtr();
10675 SDValue Value = SN->getValue();
10676 EVT StoreVT = Value.getValueType();
10677
10678 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10679 return Op;
10680
10681 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10682 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10683 // underlying registers individually.
10684 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))
10685 "Type unsupported without MMA")(((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"
) ? static_cast<void> (0) : __assert_fail ("(StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && \"Type unsupported without MMA\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10685, __PRETTY_FUNCTION__))
;
10686 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10687, __PRETTY_FUNCTION__))
10687 "Type unsupported without paired vector support")(((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support") ? static_cast
<void> (0) : __assert_fail ("(StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && \"Type unsupported without paired vector support\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10687, __PRETTY_FUNCTION__))
;
10688 Align Alignment = SN->getAlign();
10689 SmallVector<SDValue, 4> Stores;
10690 unsigned NumVecs = 2;
10691 if (StoreVT == MVT::v512i1) {
10692 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10693 NumVecs = 4;
10694 }
10695 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10696 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10697 SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10698 DAG.getConstant(VecNum, dl, MVT::i64));
10699 SDValue Store =
10700 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10701 SN->getPointerInfo().getWithOffset(Idx * 16),
10702 commonAlignment(Alignment, Idx * 16),
10703 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10704 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10705 DAG.getConstant(16, dl, BasePtr.getValueType()));
10706 Stores.push_back(Store);
10707 }
10708 SDValue TF = DAG.getTokenFactor(dl, Stores);
10709 return TF;
10710}
10711
10712SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10713 SDLoc dl(Op);
10714 if (Op.getValueType() == MVT::v4i32) {
10715 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10716
10717 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10718 // +16 as shift amt.
10719 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10720 SDValue RHSSwap = // = vrlw RHS, 16
10721 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10722
10723 // Shrinkify inputs to v8i16.
10724 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10725 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10726 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10727
10728 // Low parts multiplied together, generating 32-bit results (we ignore the
10729 // top parts).
10730 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10731 LHS, RHS, DAG, dl, MVT::v4i32);
10732
10733 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10734 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10735 // Shift the high parts up 16 bits.
10736 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10737 Neg16, DAG, dl);
10738 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10739 } else if (Op.getValueType() == MVT::v16i8) {
10740 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10741 bool isLittleEndian = Subtarget.isLittleEndian();
10742
10743 // Multiply the even 8-bit parts, producing 16-bit sums.
10744 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10745 LHS, RHS, DAG, dl, MVT::v8i16);
10746 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10747
10748 // Multiply the odd 8-bit parts, producing 16-bit sums.
10749 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10750 LHS, RHS, DAG, dl, MVT::v8i16);
10751 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10752
10753 // Merge the results together. Because vmuleub and vmuloub are
10754 // instructions with a big-endian bias, we must reverse the
10755 // element numbering and reverse the meaning of "odd" and "even"
10756 // when generating little endian code.
10757 int Ops[16];
10758 for (unsigned i = 0; i != 8; ++i) {
10759 if (isLittleEndian) {
10760 Ops[i*2 ] = 2*i;
10761 Ops[i*2+1] = 2*i+16;
10762 } else {
10763 Ops[i*2 ] = 2*i+1;
10764 Ops[i*2+1] = 2*i+1+16;
10765 }
10766 }
10767 if (isLittleEndian)
10768 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10769 else
10770 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10771 } else {
10772 llvm_unreachable("Unknown mul to lower!")::llvm::llvm_unreachable_internal("Unknown mul to lower!", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10772)
;
10773 }
10774}
10775
10776SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10777
10778 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS")((Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::ABS && \"Should only be called for ISD::ABS\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10778, __PRETTY_FUNCTION__))
;
10779
10780 EVT VT = Op.getValueType();
10781 assert(VT.isVector() &&((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10782, __PRETTY_FUNCTION__))
10782 "Only set vector abs as custom, scalar abs shouldn't reach here!")((VT.isVector() && "Only set vector abs as custom, scalar abs shouldn't reach here!"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Only set vector abs as custom, scalar abs shouldn't reach here!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10782, __PRETTY_FUNCTION__))
;
10783 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
10784 VT == MVT::v16i8) &&(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
10785 "Unexpected vector element type!")(((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
VT == MVT::v16i8) && "Unexpected vector element type!"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8) && \"Unexpected vector element type!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10785, __PRETTY_FUNCTION__))
;
10786 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10787, __PRETTY_FUNCTION__))
10787 "Current subtarget doesn't support smax v2i64!")(((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && "Current subtarget doesn't support smax v2i64!"
) ? static_cast<void> (0) : __assert_fail ("(VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && \"Current subtarget doesn't support smax v2i64!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10787, __PRETTY_FUNCTION__))
;
10788
10789 // For vector abs, it can be lowered to:
10790 // abs x
10791 // ==>
10792 // y = -x
10793 // smax(x, y)
10794
10795 SDLoc dl(Op);
10796 SDValue X = Op.getOperand(0);
10797 SDValue Zero = DAG.getConstant(0, dl, VT);
10798 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10799
10800 // SMAX patch https://reviews.llvm.org/D47332
10801 // hasn't landed yet, so use intrinsic first here.
10802 // TODO: Should use SMAX directly once SMAX patch landed
10803 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10804 if (VT == MVT::v2i64)
10805 BifID = Intrinsic::ppc_altivec_vmaxsd;
10806 else if (VT == MVT::v8i16)
10807 BifID = Intrinsic::ppc_altivec_vmaxsh;
10808 else if (VT == MVT::v16i8)
10809 BifID = Intrinsic::ppc_altivec_vmaxsb;
10810
10811 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10812}
10813
10814// Custom lowering for fpext vf32 to v2f64
10815SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10816
10817 assert(Op.getOpcode() == ISD::FP_EXTEND &&((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10818, __PRETTY_FUNCTION__))
10818 "Should only be called for ISD::FP_EXTEND")((Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::FP_EXTEND && \"Should only be called for ISD::FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10818, __PRETTY_FUNCTION__))
;
10819
10820 // FIXME: handle extends from half precision float vectors on P9.
10821 // We only want to custom lower an extend from v2f32 to v2f64.
10822 if (Op.getValueType() != MVT::v2f64 ||
10823 Op.getOperand(0).getValueType() != MVT::v2f32)
10824 return SDValue();
10825
10826 SDLoc dl(Op);
10827 SDValue Op0 = Op.getOperand(0);
10828
10829 switch (Op0.getOpcode()) {
10830 default:
10831 return SDValue();
10832 case ISD::EXTRACT_SUBVECTOR: {
10833 assert(Op0.getNumOperands() == 2 &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10835, __PRETTY_FUNCTION__))
10834 isa<ConstantSDNode>(Op0->getOperand(1)) &&((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10835, __PRETTY_FUNCTION__))
10835 "Node should have 2 operands with second one being a constant!")((Op0.getNumOperands() == 2 && isa<ConstantSDNode>
(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"
) ? static_cast<void> (0) : __assert_fail ("Op0.getNumOperands() == 2 && isa<ConstantSDNode>(Op0->getOperand(1)) && \"Node should have 2 operands with second one being a constant!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10835, __PRETTY_FUNCTION__))
;
10836
10837 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10838 return SDValue();
10839
10840 // Custom lower is only done for high or low doubleword.
10841 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10842 if (Idx % 2 != 0)
10843 return SDValue();
10844
10845 // Since input is v4f32, at this point Idx is either 0 or 2.
10846 // Shift to get the doubleword position we want.
10847 int DWord = Idx >> 1;
10848
10849 // High and low word positions are different on little endian.
10850 if (Subtarget.isLittleEndian())
10851 DWord ^= 0x1;
10852
10853 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10854 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10855 }
10856 case ISD::FADD:
10857 case ISD::FMUL:
10858 case ISD::FSUB: {
10859 SDValue NewLoad[2];
10860 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10861 // Ensure both input are loads.
10862 SDValue LdOp = Op0.getOperand(i);
10863 if (LdOp.getOpcode() != ISD::LOAD)
10864 return SDValue();
10865 // Generate new load node.
10866 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10867 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10868 NewLoad[i] = DAG.getMemIntrinsicNode(
10869 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10870 LD->getMemoryVT(), LD->getMemOperand());
10871 }
10872 SDValue NewOp =
10873 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10874 NewLoad[1], Op0.getNode()->getFlags());
10875 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10876 DAG.getConstant(0, dl, MVT::i32));
10877 }
10878 case ISD::LOAD: {
10879 LoadSDNode *LD = cast<LoadSDNode>(Op0);
10880 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10881 SDValue NewLd = DAG.getMemIntrinsicNode(
10882 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10883 LD->getMemoryVT(), LD->getMemOperand());
10884 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10885 DAG.getConstant(0, dl, MVT::i32));
10886 }
10887 }
10888 llvm_unreachable("ERROR:Should return for all cases within swtich.")::llvm::llvm_unreachable_internal("ERROR:Should return for all cases within swtich."
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10888)
;
10889}
10890
10891/// LowerOperation - Provide custom lowering hooks for some operations.
10892///
10893SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10894 switch (Op.getOpcode()) {
10895 default: llvm_unreachable("Wasn't expecting to be able to lower this!")::llvm::llvm_unreachable_internal("Wasn't expecting to be able to lower this!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10895)
;
10896 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10897 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10898 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10899 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10900 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10901 case ISD::SETCC: return LowerSETCC(Op, DAG);
10902 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10903 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10904
10905 // Variable argument lowering.
10906 case ISD::VASTART: return LowerVASTART(Op, DAG);
10907 case ISD::VAARG: return LowerVAARG(Op, DAG);
10908 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10909
10910 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10911 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10912 case ISD::GET_DYNAMIC_AREA_OFFSET:
10913 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10914
10915 // Exception handling lowering.
10916 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10917 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10918 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10919
10920 case ISD::LOAD: return LowerLOAD(Op, DAG);
10921 case ISD::STORE: return LowerSTORE(Op, DAG);
10922 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10923 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10924 case ISD::STRICT_FP_TO_UINT:
10925 case ISD::STRICT_FP_TO_SINT:
10926 case ISD::FP_TO_UINT:
10927 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10928 case ISD::STRICT_UINT_TO_FP:
10929 case ISD::STRICT_SINT_TO_FP:
10930 case ISD::UINT_TO_FP:
10931 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10932 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10933
10934 // Lower 64-bit shifts.
10935 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10936 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10937 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10938
10939 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10940 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10941
10942 // Vector-related lowering.
10943 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10944 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10945 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10946 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10947 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10948 case ISD::MUL: return LowerMUL(Op, DAG);
10949 case ISD::ABS: return LowerABS(Op, DAG);
10950 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10951 case ISD::ROTL: return LowerROTL(Op, DAG);
10952
10953 // For counter-based loop handling.
10954 case ISD::INTRINSIC_W_CHAIN: return SDValue();
10955
10956 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10957
10958 // Frame & Return address.
10959 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10960 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10961
10962 case ISD::INTRINSIC_VOID:
10963 return LowerINTRINSIC_VOID(Op, DAG);
10964 case ISD::BSWAP:
10965 return LowerBSWAP(Op, DAG);
10966 case ISD::ATOMIC_CMP_SWAP:
10967 return LowerATOMIC_CMP_SWAP(Op, DAG);
10968 }
10969}
10970
10971void PPCTargetLowering::LowerOperationWrapper(SDNode *N,
10972 SmallVectorImpl<SDValue> &Results,
10973 SelectionDAG &DAG) const {
10974 SDValue Res = LowerOperation(SDValue(N, 0), DAG);
10975
10976 if (!Res.getNode())
10977 return;
10978
10979 // Take the return value as-is if original node has only one result.
10980 if (N->getNumValues() == 1) {
10981 Results.push_back(Res);
10982 return;
10983 }
10984
10985 // New node should have the same number of results.
10986 assert((N->getNumValues() == Res->getNumValues()) &&(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10987, __PRETTY_FUNCTION__))
10987 "Lowering returned the wrong number of results!")(((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!") ? static_cast
<void> (0) : __assert_fail ("(N->getNumValues() == Res->getNumValues()) && \"Lowering returned the wrong number of results!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10987, __PRETTY_FUNCTION__))
;
10988
10989 for (unsigned i = 0; i < N->getNumValues(); ++i)
10990 Results.push_back(Res.getValue(i));
10991}
10992
10993void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10994 SmallVectorImpl<SDValue>&Results,
10995 SelectionDAG &DAG) const {
10996 SDLoc dl(N);
10997 switch (N->getOpcode()) {
10998 default:
10999 llvm_unreachable("Do not know how to custom type legalize this operation!")::llvm::llvm_unreachable_internal("Do not know how to custom type legalize this operation!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 10999)
;
11000 case ISD::READCYCLECOUNTER: {
11001 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11002 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11003
11004 Results.push_back(
11005 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11006 Results.push_back(RTB.getValue(2));
11007 break;
11008 }
11009 case ISD::INTRINSIC_W_CHAIN: {
11010 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11011 Intrinsic::loop_decrement)
11012 break;
11013
11014 assert(N->getValueType(0) == MVT::i1 &&((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11015, __PRETTY_FUNCTION__))
11015 "Unexpected result type for CTR decrement intrinsic")((N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i1 && \"Unexpected result type for CTR decrement intrinsic\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11015, __PRETTY_FUNCTION__))
;
11016 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11017 N->getValueType(0));
11018 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11019 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11020 N->getOperand(1));
11021
11022 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11023 Results.push_back(NewInt.getValue(1));
11024 break;
11025 }
11026 case ISD::VAARG: {
11027 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11028 return;
11029
11030 EVT VT = N->getValueType(0);
11031
11032 if (VT == MVT::i64) {
11033 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11034
11035 Results.push_back(NewNode);
11036 Results.push_back(NewNode.getValue(1));
11037 }
11038 return;
11039 }
11040 case ISD::STRICT_FP_TO_SINT:
11041 case ISD::STRICT_FP_TO_UINT:
11042 case ISD::FP_TO_SINT:
11043 case ISD::FP_TO_UINT:
11044 // LowerFP_TO_INT() can only handle f32 and f64.
11045 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11046 MVT::ppcf128)
11047 return;
11048 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11049 return;
11050 case ISD::TRUNCATE: {
11051 if (!N->getValueType(0).isVector())
11052 return;
11053 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11054 if (Lowered)
11055 Results.push_back(Lowered);
11056 return;
11057 }
11058 case ISD::FSHL:
11059 case ISD::FSHR:
11060 // Don't handle funnel shifts here.
11061 return;
11062 case ISD::BITCAST:
11063 // Don't handle bitcast here.
11064 return;
11065 case ISD::FP_EXTEND:
11066 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11067 if (Lowered)
11068 Results.push_back(Lowered);
11069 return;
11070 }
11071}
11072
11073//===----------------------------------------------------------------------===//
11074// Other Lowering Code
11075//===----------------------------------------------------------------------===//
11076
11077static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
11078 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11079 Function *Func = Intrinsic::getDeclaration(M, Id);
11080 return Builder.CreateCall(Func, {});
11081}
11082
11083// The mappings for emitLeading/TrailingFence is taken from
11084// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11085Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11086 Instruction *Inst,
11087 AtomicOrdering Ord) const {
11088 if (Ord == AtomicOrdering::SequentiallyConsistent)
11089 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11090 if (isReleaseOrStronger(Ord))
11091 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11092 return nullptr;
11093}
11094
11095Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11096 Instruction *Inst,
11097 AtomicOrdering Ord) const {
11098 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11099 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11100 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11101 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11102 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11103 return Builder.CreateCall(
11104 Intrinsic::getDeclaration(
11105 Builder.GetInsertBlock()->getParent()->getParent(),
11106 Intrinsic::ppc_cfence, {Inst->getType()}),
11107 {Inst});
11108 // FIXME: Can use isync for rmw operation.
11109 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11110 }
11111 return nullptr;
11112}
11113
11114MachineBasicBlock *
11115PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11116 unsigned AtomicSize,
11117 unsigned BinOpcode,
11118 unsigned CmpOpcode,
11119 unsigned CmpPred) const {
11120 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11121 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11122
11123 auto LoadMnemonic = PPC::LDARX;
11124 auto StoreMnemonic = PPC::STDCX;
11125 switch (AtomicSize) {
11126 default:
11127 llvm_unreachable("Unexpected size of atomic entity")::llvm::llvm_unreachable_internal("Unexpected size of atomic entity"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11127)
;
11128 case 1:
11129 LoadMnemonic = PPC::LBARX;
11130 StoreMnemonic = PPC::STBCX;
11131 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11131, __PRETTY_FUNCTION__))
;
11132 break;
11133 case 2:
11134 LoadMnemonic = PPC::LHARX;
11135 StoreMnemonic = PPC::STHCX;
11136 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4")((Subtarget.hasPartwordAtomics() && "Call this only with size >=4"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"Call this only with size >=4\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11136, __PRETTY_FUNCTION__))
;
11137 break;
11138 case 4:
11139 LoadMnemonic = PPC::LWARX;
11140 StoreMnemonic = PPC::STWCX;
11141 break;
11142 case 8:
11143 LoadMnemonic = PPC::LDARX;
11144 StoreMnemonic = PPC::STDCX;
11145 break;
11146 }
11147
11148 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11149 MachineFunction *F = BB->getParent();
11150 MachineFunction::iterator It = ++BB->getIterator();
11151
11152 Register dest = MI.getOperand(0).getReg();
11153 Register ptrA = MI.getOperand(1).getReg();
11154 Register ptrB = MI.getOperand(2).getReg();
11155 Register incr = MI.getOperand(3).getReg();
11156 DebugLoc dl = MI.getDebugLoc();
11157
11158 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11159 MachineBasicBlock *loop2MBB =
11160 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11161 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11162 F->insert(It, loopMBB);
11163 if (CmpOpcode)
11164 F->insert(It, loop2MBB);
11165 F->insert(It, exitMBB);
11166 exitMBB->splice(exitMBB->begin(), BB,
11167 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11168 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11169
11170 MachineRegisterInfo &RegInfo = F->getRegInfo();
11171 Register TmpReg = (!BinOpcode) ? incr :
11172 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11173 : &PPC::GPRCRegClass);
11174
11175 // thisMBB:
11176 // ...
11177 // fallthrough --> loopMBB
11178 BB->addSuccessor(loopMBB);
11179
11180 // loopMBB:
11181 // l[wd]arx dest, ptr
11182 // add r0, dest, incr
11183 // st[wd]cx. r0, ptr
11184 // bne- loopMBB
11185 // fallthrough --> exitMBB
11186
11187 // For max/min...
11188 // loopMBB:
11189 // l[wd]arx dest, ptr
11190 // cmpl?[wd] incr, dest
11191 // bgt exitMBB
11192 // loop2MBB:
11193 // st[wd]cx. dest, ptr
11194 // bne- loopMBB
11195 // fallthrough --> exitMBB
11196
11197 BB = loopMBB;
11198 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11199 .addReg(ptrA).addReg(ptrB);
11200 if (BinOpcode)
11201 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11202 if (CmpOpcode) {
11203 // Signed comparisons of byte or halfword values must be sign-extended.
11204 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11205 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11206 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11207 ExtReg).addReg(dest);
11208 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11209 .addReg(incr).addReg(ExtReg);
11210 } else
11211 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11212 .addReg(incr).addReg(dest);
11213
11214 BuildMI(BB, dl, TII->get(PPC::BCC))
11215 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11216 BB->addSuccessor(loop2MBB);
11217 BB->addSuccessor(exitMBB);
11218 BB = loop2MBB;
11219 }
11220 BuildMI(BB, dl, TII->get(StoreMnemonic))
11221 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11222 BuildMI(BB, dl, TII->get(PPC::BCC))
11223 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11224 BB->addSuccessor(loopMBB);
11225 BB->addSuccessor(exitMBB);
11226
11227 // exitMBB:
11228 // ...
11229 BB = exitMBB;
11230 return BB;
11231}
11232
11233MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11234 MachineInstr &MI, MachineBasicBlock *BB,
11235 bool is8bit, // operation
11236 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11237 // If we support part-word atomic mnemonics, just use them
11238 if (Subtarget.hasPartwordAtomics())
11239 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11240 CmpPred);
11241
11242 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11243 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11244 // In 64 bit mode we have to use 64 bits for addresses, even though the
11245 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11246 // registers without caring whether they're 32 or 64, but here we're
11247 // doing actual arithmetic on the addresses.
11248 bool is64bit = Subtarget.isPPC64();
11249 bool isLittleEndian = Subtarget.isLittleEndian();
11250 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11251
11252 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11253 MachineFunction *F = BB->getParent();
11254 MachineFunction::iterator It = ++BB->getIterator();
11255
11256 Register dest = MI.getOperand(0).getReg();
11257 Register ptrA = MI.getOperand(1).getReg();
11258 Register ptrB = MI.getOperand(2).getReg();
11259 Register incr = MI.getOperand(3).getReg();
11260 DebugLoc dl = MI.getDebugLoc();
11261
11262 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11263 MachineBasicBlock *loop2MBB =
11264 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11265 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11266 F->insert(It, loopMBB);
11267 if (CmpOpcode)
11268 F->insert(It, loop2MBB);
11269 F->insert(It, exitMBB);
11270 exitMBB->splice(exitMBB->begin(), BB,
11271 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11272 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11273
11274 MachineRegisterInfo &RegInfo = F->getRegInfo();
11275 const TargetRegisterClass *RC =
11276 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11277 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11278
11279 Register PtrReg = RegInfo.createVirtualRegister(RC);
11280 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11281 Register ShiftReg =
11282 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11283 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11284 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11285 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11286 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11287 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11288 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11289 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11290 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11291 Register Ptr1Reg;
11292 Register TmpReg =
11293 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11294
11295 // thisMBB:
11296 // ...
11297 // fallthrough --> loopMBB
11298 BB->addSuccessor(loopMBB);
11299
11300 // The 4-byte load must be aligned, while a char or short may be
11301 // anywhere in the word. Hence all this nasty bookkeeping code.
11302 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11303 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11304 // xori shift, shift1, 24 [16]
11305 // rlwinm ptr, ptr1, 0, 0, 29
11306 // slw incr2, incr, shift
11307 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11308 // slw mask, mask2, shift
11309 // loopMBB:
11310 // lwarx tmpDest, ptr
11311 // add tmp, tmpDest, incr2
11312 // andc tmp2, tmpDest, mask
11313 // and tmp3, tmp, mask
11314 // or tmp4, tmp3, tmp2
11315 // stwcx. tmp4, ptr
11316 // bne- loopMBB
11317 // fallthrough --> exitMBB
11318 // srw dest, tmpDest, shift
11319 if (ptrA != ZeroReg) {
11320 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11321 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11322 .addReg(ptrA)
11323 .addReg(ptrB);
11324 } else {
11325 Ptr1Reg = ptrB;
11326 }
11327 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11328 // mode.
11329 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11330 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11331 .addImm(3)
11332 .addImm(27)
11333 .addImm(is8bit ? 28 : 27);
11334 if (!isLittleEndian)
11335 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11336 .addReg(Shift1Reg)
11337 .addImm(is8bit ? 24 : 16);
11338 if (is64bit)
11339 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11340 .addReg(Ptr1Reg)
11341 .addImm(0)
11342 .addImm(61);
11343 else
11344 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11345 .addReg(Ptr1Reg)
11346 .addImm(0)
11347 .addImm(0)
11348 .addImm(29);
11349 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11350 if (is8bit)
11351 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11352 else {
11353 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11354 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11355 .addReg(Mask3Reg)
11356 .addImm(65535);
11357 }
11358 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11359 .addReg(Mask2Reg)
11360 .addReg(ShiftReg);
11361
11362 BB = loopMBB;
11363 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11364 .addReg(ZeroReg)
11365 .addReg(PtrReg);
11366 if (BinOpcode)
11367 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11368 .addReg(Incr2Reg)
11369 .addReg(TmpDestReg);
11370 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11371 .addReg(TmpDestReg)
11372 .addReg(MaskReg);
11373 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11374 if (CmpOpcode) {
11375 // For unsigned comparisons, we can directly compare the shifted values.
11376 // For signed comparisons we shift and sign extend.
11377 Register SReg = RegInfo.createVirtualRegister(GPRC);
11378 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11379 .addReg(TmpDestReg)
11380 .addReg(MaskReg);
11381 unsigned ValueReg = SReg;
11382 unsigned CmpReg = Incr2Reg;
11383 if (CmpOpcode == PPC::CMPW) {
11384 ValueReg = RegInfo.createVirtualRegister(GPRC);
11385 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11386 .addReg(SReg)
11387 .addReg(ShiftReg);
11388 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11389 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11390 .addReg(ValueReg);
11391 ValueReg = ValueSReg;
11392 CmpReg = incr;
11393 }
11394 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11395 .addReg(CmpReg)
11396 .addReg(ValueReg);
11397 BuildMI(BB, dl, TII->get(PPC::BCC))
11398 .addImm(CmpPred)
11399 .addReg(PPC::CR0)
11400 .addMBB(exitMBB);
11401 BB->addSuccessor(loop2MBB);
11402 BB->addSuccessor(exitMBB);
11403 BB = loop2MBB;
11404 }
11405 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11406 BuildMI(BB, dl, TII->get(PPC::STWCX))
11407 .addReg(Tmp4Reg)
11408 .addReg(ZeroReg)
11409 .addReg(PtrReg);
11410 BuildMI(BB, dl, TII->get(PPC::BCC))
11411 .addImm(PPC::PRED_NE)
11412 .addReg(PPC::CR0)
11413 .addMBB(loopMBB);
11414 BB->addSuccessor(loopMBB);
11415 BB->addSuccessor(exitMBB);
11416
11417 // exitMBB:
11418 // ...
11419 BB = exitMBB;
11420 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11421 .addReg(TmpDestReg)
11422 .addReg(ShiftReg);
11423 return BB;
11424}
11425
11426llvm::MachineBasicBlock *
11427PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11428 MachineBasicBlock *MBB) const {
11429 DebugLoc DL = MI.getDebugLoc();
11430 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11431 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11432
11433 MachineFunction *MF = MBB->getParent();
11434 MachineRegisterInfo &MRI = MF->getRegInfo();
11435
11436 const BasicBlock *BB = MBB->getBasicBlock();
11437 MachineFunction::iterator I = ++MBB->getIterator();
11438
11439 Register DstReg = MI.getOperand(0).getReg();
11440 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11441 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!")((TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"
) ? static_cast<void> (0) : __assert_fail ("TRI->isTypeLegalForClass(*RC, MVT::i32) && \"Invalid destination!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11441, __PRETTY_FUNCTION__))
;
11442 Register mainDstReg = MRI.createVirtualRegister(RC);
11443 Register restoreDstReg = MRI.createVirtualRegister(RC);
11444
11445 MVT PVT = getPointerTy(MF->getDataLayout());
11446 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11447, __PRETTY_FUNCTION__))
11447 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11447, __PRETTY_FUNCTION__))
;
11448 // For v = setjmp(buf), we generate
11449 //
11450 // thisMBB:
11451 // SjLjSetup mainMBB
11452 // bl mainMBB
11453 // v_restore = 1
11454 // b sinkMBB
11455 //
11456 // mainMBB:
11457 // buf[LabelOffset] = LR
11458 // v_main = 0
11459 //
11460 // sinkMBB:
11461 // v = phi(main, restore)
11462 //
11463
11464 MachineBasicBlock *thisMBB = MBB;
11465 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11466 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11467 MF->insert(I, mainMBB);
11468 MF->insert(I, sinkMBB);
11469
11470 MachineInstrBuilder MIB;
11471
11472 // Transfer the remainder of BB and its successor edges to sinkMBB.
11473 sinkMBB->splice(sinkMBB->begin(), MBB,
11474 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11475 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11476
11477 // Note that the structure of the jmp_buf used here is not compatible
11478 // with that used by libc, and is not designed to be. Specifically, it
11479 // stores only those 'reserved' registers that LLVM does not otherwise
11480 // understand how to spill. Also, by convention, by the time this
11481 // intrinsic is called, Clang has already stored the frame address in the
11482 // first slot of the buffer and stack address in the third. Following the
11483 // X86 target code, we'll store the jump address in the second slot. We also
11484 // need to save the TOC pointer (R2) to handle jumps between shared
11485 // libraries, and that will be stored in the fourth slot. The thread
11486 // identifier (R13) is not affected.
11487
11488 // thisMBB:
11489 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11490 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11491 const int64_t BPOffset = 4 * PVT.getStoreSize();
11492
11493 // Prepare IP either in reg.
11494 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11495 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11496 Register BufReg = MI.getOperand(1).getReg();
11497
11498 if (Subtarget.is64BitELFABI()) {
11499 setUsesTOCBasePtr(*MBB->getParent());
11500 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11501 .addReg(PPC::X2)
11502 .addImm(TOCOffset)
11503 .addReg(BufReg)
11504 .cloneMemRefs(MI);
11505 }
11506
11507 // Naked functions never have a base pointer, and so we use r1. For all
11508 // other functions, this decision must be delayed until during PEI.
11509 unsigned BaseReg;
11510 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11511 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11512 else
11513 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11514
11515 MIB = BuildMI(*thisMBB, MI, DL,
11516 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11517 .addReg(BaseReg)
11518 .addImm(BPOffset)
11519 .addReg(BufReg)
11520 .cloneMemRefs(MI);
11521
11522 // Setup
11523 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11524 MIB.addRegMask(TRI->getNoPreservedMask());
11525
11526 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11527
11528 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11529 .addMBB(mainMBB);
11530 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11531
11532 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11533 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11534
11535 // mainMBB:
11536 // mainDstReg = 0
11537 MIB =
11538 BuildMI(mainMBB, DL,
11539 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11540
11541 // Store IP
11542 if (Subtarget.isPPC64()) {
11543 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11544 .addReg(LabelReg)
11545 .addImm(LabelOffset)
11546 .addReg(BufReg);
11547 } else {
11548 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11549 .addReg(LabelReg)
11550 .addImm(LabelOffset)
11551 .addReg(BufReg);
11552 }
11553 MIB.cloneMemRefs(MI);
11554
11555 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11556 mainMBB->addSuccessor(sinkMBB);
11557
11558 // sinkMBB:
11559 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11560 TII->get(PPC::PHI), DstReg)
11561 .addReg(mainDstReg).addMBB(mainMBB)
11562 .addReg(restoreDstReg).addMBB(thisMBB);
11563
11564 MI.eraseFromParent();
11565 return sinkMBB;
11566}
11567
11568MachineBasicBlock *
11569PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11570 MachineBasicBlock *MBB) const {
11571 DebugLoc DL = MI.getDebugLoc();
11572 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11573
11574 MachineFunction *MF = MBB->getParent();
11575 MachineRegisterInfo &MRI = MF->getRegInfo();
11576
11577 MVT PVT = getPointerTy(MF->getDataLayout());
11578 assert((PVT == MVT::i64 || PVT == MVT::i32) &&(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11579, __PRETTY_FUNCTION__))
11579 "Invalid Pointer Size!")(((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"
) ? static_cast<void> (0) : __assert_fail ("(PVT == MVT::i64 || PVT == MVT::i32) && \"Invalid Pointer Size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11579, __PRETTY_FUNCTION__))
;
11580
11581 const TargetRegisterClass *RC =
11582 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11583 Register Tmp = MRI.createVirtualRegister(RC);
11584 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11585 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11586 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11587 unsigned BP =
11588 (PVT == MVT::i64)
11589 ? PPC::X30
11590 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11591 : PPC::R30);
11592
11593 MachineInstrBuilder MIB;
11594
11595 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11596 const int64_t SPOffset = 2 * PVT.getStoreSize();
11597 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11598 const int64_t BPOffset = 4 * PVT.getStoreSize();
11599
11600 Register BufReg = MI.getOperand(0).getReg();
11601
11602 // Reload FP (the jumped-to function may not have had a
11603 // frame pointer, and if so, then its r31 will be restored
11604 // as necessary).
11605 if (PVT == MVT::i64) {
11606 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11607 .addImm(0)
11608 .addReg(BufReg);
11609 } else {
11610 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11611 .addImm(0)
11612 .addReg(BufReg);
11613 }
11614 MIB.cloneMemRefs(MI);
11615
11616 // Reload IP
11617 if (PVT == MVT::i64) {
11618 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11619 .addImm(LabelOffset)
11620 .addReg(BufReg);
11621 } else {
11622 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11623 .addImm(LabelOffset)
11624 .addReg(BufReg);
11625 }
11626 MIB.cloneMemRefs(MI);
11627
11628 // Reload SP
11629 if (PVT == MVT::i64) {
11630 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11631 .addImm(SPOffset)
11632 .addReg(BufReg);
11633 } else {
11634 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11635 .addImm(SPOffset)
11636 .addReg(BufReg);
11637 }
11638 MIB.cloneMemRefs(MI);
11639
11640 // Reload BP
11641 if (PVT == MVT::i64) {
11642 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11643 .addImm(BPOffset)
11644 .addReg(BufReg);
11645 } else {
11646 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11647 .addImm(BPOffset)
11648 .addReg(BufReg);
11649 }
11650 MIB.cloneMemRefs(MI);
11651
11652 // Reload TOC
11653 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11654 setUsesTOCBasePtr(*MBB->getParent());
11655 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11656 .addImm(TOCOffset)
11657 .addReg(BufReg)
11658 .cloneMemRefs(MI);
11659 }
11660
11661 // Jump
11662 BuildMI(*MBB, MI, DL,
11663 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11664 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11665
11666 MI.eraseFromParent();
11667 return MBB;
11668}
11669
11670bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11671 // If the function specifically requests inline stack probes, emit them.
11672 if (MF.getFunction().hasFnAttribute("probe-stack"))
11673 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11674 "inline-asm";
11675 return false;
11676}
11677
11678unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11679 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11680 unsigned StackAlign = TFI->getStackAlignment();
11681 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11682, __PRETTY_FUNCTION__))
11682 "Unexpected stack alignment")((StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
"Unexpected stack alignment") ? static_cast<void> (0) :
__assert_fail ("StackAlign >= 1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11682, __PRETTY_FUNCTION__))
;
11683 // The default stack probe size is 4096 if the function has no
11684 // stack-probe-size attribute.
11685 unsigned StackProbeSize = 4096;
11686 const Function &Fn = MF.getFunction();
11687 if (Fn.hasFnAttribute("stack-probe-size"))
11688 Fn.getFnAttribute("stack-probe-size")
11689 .getValueAsString()
11690 .getAsInteger(0, StackProbeSize);
11691 // Round down to the stack alignment.
11692 StackProbeSize &= ~(StackAlign - 1);
11693 return StackProbeSize ? StackProbeSize : StackAlign;
11694}
11695
11696// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11697// into three phases. In the first phase, it uses pseudo instruction
11698// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11699// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11700// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11701// MaxCallFrameSize so that it can calculate correct data area pointer.
11702MachineBasicBlock *
11703PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11704 MachineBasicBlock *MBB) const {
11705 const bool isPPC64 = Subtarget.isPPC64();
11706 MachineFunction *MF = MBB->getParent();
11707 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11708 DebugLoc DL = MI.getDebugLoc();
11709 const unsigned ProbeSize = getStackProbeSize(*MF);
11710 const BasicBlock *ProbedBB = MBB->getBasicBlock();
11711 MachineRegisterInfo &MRI = MF->getRegInfo();
11712 // The CFG of probing stack looks as
11713 // +-----+
11714 // | MBB |
11715 // +--+--+
11716 // |
11717 // +----v----+
11718 // +--->+ TestMBB +---+
11719 // | +----+----+ |
11720 // | | |
11721 // | +-----v----+ |
11722 // +---+ BlockMBB | |
11723 // +----------+ |
11724 // |
11725 // +---------+ |
11726 // | TailMBB +<--+
11727 // +---------+
11728 // In MBB, calculate previous frame pointer and final stack pointer.
11729 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11730 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11731 // TailMBB is spliced via \p MI.
11732 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11733 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11734 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11735
11736 MachineFunction::iterator MBBIter = ++MBB->getIterator();
11737 MF->insert(MBBIter, TestMBB);
11738 MF->insert(MBBIter, BlockMBB);
11739 MF->insert(MBBIter, TailMBB);
11740
11741 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11742 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11743
11744 Register DstReg = MI.getOperand(0).getReg();
11745 Register NegSizeReg = MI.getOperand(1).getReg();
11746 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11747 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11748 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11749 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11750
11751 // Since value of NegSizeReg might be realigned in prologepilog, insert a
11752 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11753 // NegSize.
11754 unsigned ProbeOpc;
11755 if (!MRI.hasOneNonDBGUse(NegSizeReg))
11756 ProbeOpc =
11757 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11758 else
11759 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11760 // and NegSizeReg will be allocated in the same phyreg to avoid
11761 // redundant copy when NegSizeReg has only one use which is current MI and
11762 // will be replaced by PREPARE_PROBED_ALLOCA then.
11763 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11764 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11765 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11766 .addDef(ActualNegSizeReg)
11767 .addReg(NegSizeReg)
11768 .add(MI.getOperand(2))
11769 .add(MI.getOperand(3));
11770
11771 // Calculate final stack pointer, which equals to SP + ActualNegSize.
11772 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11773 FinalStackPtr)
11774 .addReg(SPReg)
11775 .addReg(ActualNegSizeReg);
11776
11777 // Materialize a scratch register for update.
11778 int64_t NegProbeSize = -(int64_t)ProbeSize;
11779 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!")((isInt<32>(NegProbeSize) && "Unhandled probe size!"
) ? static_cast<void> (0) : __assert_fail ("isInt<32>(NegProbeSize) && \"Unhandled probe size!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 11779, __PRETTY_FUNCTION__))
;
11780 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11781 if (!isInt<16>(NegProbeSize)) {
11782 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11783 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11784 .addImm(NegProbeSize >> 16);
11785 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11786 ScratchReg)
11787 .addReg(TempReg)
11788 .addImm(NegProbeSize & 0xFFFF);
11789 } else
11790 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11791 .addImm(NegProbeSize);
11792
11793 {
11794 // Probing leading residual part.
11795 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11796 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11797 .addReg(ActualNegSizeReg)
11798 .addReg(ScratchReg);
11799 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11800 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11801 .addReg(Div)
11802 .addReg(ScratchReg);
11803 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11804 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11805 .addReg(Mul)
11806 .addReg(ActualNegSizeReg);
11807 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11808 .addReg(FramePointer)
11809 .addReg(SPReg)
11810 .addReg(NegMod);
11811 }
11812
11813 {
11814 // Remaining part should be multiple of ProbeSize.
11815 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11816 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11817 .addReg(SPReg)
11818 .addReg(FinalStackPtr);
11819 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11820 .addImm(PPC::PRED_EQ)
11821 .addReg(CmpResult)
11822 .addMBB(TailMBB);
11823 TestMBB->addSuccessor(BlockMBB);
11824 TestMBB->addSuccessor(TailMBB);
11825 }
11826
11827 {
11828 // Touch the block.
11829 // |P...|P...|P...
11830 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11831 .addReg(FramePointer)
11832 .addReg(SPReg)
11833 .addReg(ScratchReg);
11834 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11835 BlockMBB->addSuccessor(TestMBB);
11836 }
11837
11838 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11839 // DYNAREAOFFSET pseudo instruction to get the future result.
11840 Register MaxCallFrameSizeReg =
11841 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11842 BuildMI(TailMBB, DL,
11843 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11844 MaxCallFrameSizeReg)
11845 .add(MI.getOperand(2))
11846 .add(MI.getOperand(3));
11847 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11848 .addReg(SPReg)
11849 .addReg(MaxCallFrameSizeReg);
11850
11851 // Splice instructions after MI to TailMBB.
11852 TailMBB->splice(TailMBB->end(), MBB,
11853 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11854 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11855 MBB->addSuccessor(TestMBB);
11856
11857 // Delete the pseudo instruction.
11858 MI.eraseFromParent();
11859
11860 ++NumDynamicAllocaProbed;
11861 return TailMBB;
11862}
11863
11864MachineBasicBlock *
11865PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11866 MachineBasicBlock *BB) const {
11867 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11868 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11869 if (Subtarget.is64BitELFABI() &&
11870 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11871 !Subtarget.isUsingPCRelativeCalls()) {
11872 // Call lowering should have added an r2 operand to indicate a dependence
11873 // on the TOC base pointer value. It can't however, because there is no
11874 // way to mark the dependence as implicit there, and so the stackmap code
11875 // will confuse it with a regular operand. Instead, add the dependence
11876 // here.
11877 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11878 }
11879
11880 return emitPatchPoint(MI, BB);
11881 }
11882
11883 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11884 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11885 return emitEHSjLjSetJmp(MI, BB);
11886 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11887 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11888 return emitEHSjLjLongJmp(MI, BB);
11889 }
11890
11891 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11892
11893 // To "insert" these instructions we actually have to insert their
11894 // control-flow patterns.
11895 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11896 MachineFunction::iterator It = ++BB->getIterator();
11897
11898 MachineFunction *F = BB->getParent();
11899
11900 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11901 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11902 MI.getOpcode() == PPC::SELECT_I8) {
11903 SmallVector<MachineOperand, 2> Cond;
11904 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11905 MI.getOpcode() == PPC::SELECT_CC_I8)
11906 Cond.push_back(MI.getOperand(4));
11907 else
11908 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11909 Cond.push_back(MI.getOperand(1));
11910
11911 DebugLoc dl = MI.getDebugLoc();
11912 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11913 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11914 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11915 MI.getOpcode() == PPC::SELECT_CC_F8 ||
11916 MI.getOpcode() == PPC::SELECT_CC_F16 ||
11917 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11918 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11919 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11920 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11921 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11922 MI.getOpcode() == PPC::SELECT_CC_SPE ||
11923 MI.getOpcode() == PPC::SELECT_F4 ||
11924 MI.getOpcode() == PPC::SELECT_F8 ||
11925 MI.getOpcode() == PPC::SELECT_F16 ||
11926 MI.getOpcode() == PPC::SELECT_SPE ||
11927 MI.getOpcode() == PPC::SELECT_SPE4 ||
11928 MI.getOpcode() == PPC::SELECT_VRRC ||
11929 MI.getOpcode() == PPC::SELECT_VSFRC ||
11930 MI.getOpcode() == PPC::SELECT_VSSRC ||
11931 MI.getOpcode() == PPC::SELECT_VSRC) {
11932 // The incoming instruction knows the destination vreg to set, the
11933 // condition code register to branch on, the true/false values to
11934 // select between, and a branch opcode to use.
11935
11936 // thisMBB:
11937 // ...
11938 // TrueVal = ...
11939 // cmpTY ccX, r1, r2
11940 // bCC copy1MBB
11941 // fallthrough --> copy0MBB
11942 MachineBasicBlock *thisMBB = BB;
11943 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11944 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11945 DebugLoc dl = MI.getDebugLoc();
11946 F->insert(It, copy0MBB);
11947 F->insert(It, sinkMBB);
11948
11949 // Transfer the remainder of BB and its successor edges to sinkMBB.
11950 sinkMBB->splice(sinkMBB->begin(), BB,
11951 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11952 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11953
11954 // Next, add the true and fallthrough blocks as its successors.
11955 BB->addSuccessor(copy0MBB);
11956 BB->addSuccessor(sinkMBB);
11957
11958 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11959 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11960 MI.getOpcode() == PPC::SELECT_F16 ||
11961 MI.getOpcode() == PPC::SELECT_SPE4 ||
11962 MI.getOpcode() == PPC::SELECT_SPE ||
11963 MI.getOpcode() == PPC::SELECT_VRRC ||
11964 MI.getOpcode() == PPC::SELECT_VSFRC ||
11965 MI.getOpcode() == PPC::SELECT_VSSRC ||
11966 MI.getOpcode() == PPC::SELECT_VSRC) {
11967 BuildMI(BB, dl, TII->get(PPC::BC))
11968 .addReg(MI.getOperand(1).getReg())
11969 .addMBB(sinkMBB);
11970 } else {
11971 unsigned SelectPred = MI.getOperand(4).getImm();
11972 BuildMI(BB, dl, TII->get(PPC::BCC))
11973 .addImm(SelectPred)
11974 .addReg(MI.getOperand(1).getReg())
11975 .addMBB(sinkMBB);
11976 }
11977
11978 // copy0MBB:
11979 // %FalseValue = ...
11980 // # fallthrough to sinkMBB
11981 BB = copy0MBB;
11982
11983 // Update machine-CFG edges
11984 BB->addSuccessor(sinkMBB);
11985
11986 // sinkMBB:
11987 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11988 // ...
11989 BB = sinkMBB;
11990 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11991 .addReg(MI.getOperand(3).getReg())
11992 .addMBB(copy0MBB)
11993 .addReg(MI.getOperand(2).getReg())
11994 .addMBB(thisMBB);
11995 } else if (MI.getOpcode() == PPC::ReadTB) {
11996 // To read the 64-bit time-base register on a 32-bit target, we read the
11997 // two halves. Should the counter have wrapped while it was being read, we
11998 // need to try again.
11999 // ...
12000 // readLoop:
12001 // mfspr Rx,TBU # load from TBU
12002 // mfspr Ry,TB # load from TB
12003 // mfspr Rz,TBU # load from TBU
12004 // cmpw crX,Rx,Rz # check if 'old'='new'
12005 // bne readLoop # branch if they're not equal
12006 // ...
12007
12008 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12009 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12010 DebugLoc dl = MI.getDebugLoc();
12011 F->insert(It, readMBB);
12012 F->insert(It, sinkMBB);
12013
12014 // Transfer the remainder of BB and its successor edges to sinkMBB.
12015 sinkMBB->splice(sinkMBB->begin(), BB,
12016 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12017 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12018
12019 BB->addSuccessor(readMBB);
12020 BB = readMBB;
12021
12022 MachineRegisterInfo &RegInfo = F->getRegInfo();
12023 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12024 Register LoReg = MI.getOperand(0).getReg();
12025 Register HiReg = MI.getOperand(1).getReg();
12026
12027 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12028 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12029 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12030
12031 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12032
12033 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12034 .addReg(HiReg)
12035 .addReg(ReadAgainReg);
12036 BuildMI(BB, dl, TII->get(PPC::BCC))
12037 .addImm(PPC::PRED_NE)
12038 .addReg(CmpReg)
12039 .addMBB(readMBB);
12040
12041 BB->addSuccessor(readMBB);
12042 BB->addSuccessor(sinkMBB);
12043 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12044 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12046 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12048 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12050 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12051
12052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12053 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12055 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12057 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12059 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12060
12061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12062 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12064 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12066 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12068 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12069
12070 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12071 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12073 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12075 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12077 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12078
12079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12080 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12082 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12083 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12084 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12086 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12087
12088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12089 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12091 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12093 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12095 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12096
12097 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12098 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12099 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12100 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12101 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12102 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12103 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12104 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12105
12106 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12107 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12108 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12109 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12110 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12111 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12112 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12113 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12114
12115 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12116 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12117 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12118 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12119 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12120 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12121 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12122 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12123
12124 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12125 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12126 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12127 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12128 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12129 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12130 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12131 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12132
12133 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12134 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12135 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12136 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12137 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12138 BB = EmitAtomicBinary(MI, BB, 4, 0);
12139 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12140 BB = EmitAtomicBinary(MI, BB, 8, 0);
12141 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12142 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12143 (Subtarget.hasPartwordAtomics() &&
12144 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12145 (Subtarget.hasPartwordAtomics() &&
12146 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12147 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12148
12149 auto LoadMnemonic = PPC::LDARX;
12150 auto StoreMnemonic = PPC::STDCX;
12151 switch (MI.getOpcode()) {
12152 default:
12153 llvm_unreachable("Compare and swap of unknown size")::llvm::llvm_unreachable_internal("Compare and swap of unknown size"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12153)
;
12154 case PPC::ATOMIC_CMP_SWAP_I8:
12155 LoadMnemonic = PPC::LBARX;
12156 StoreMnemonic = PPC::STBCX;
12157 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12157, __PRETTY_FUNCTION__))
;
12158 break;
12159 case PPC::ATOMIC_CMP_SWAP_I16:
12160 LoadMnemonic = PPC::LHARX;
12161 StoreMnemonic = PPC::STHCX;
12162 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.")((Subtarget.hasPartwordAtomics() && "No support partword atomics."
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasPartwordAtomics() && \"No support partword atomics.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12162, __PRETTY_FUNCTION__))
;
12163 break;
12164 case PPC::ATOMIC_CMP_SWAP_I32:
12165 LoadMnemonic = PPC::LWARX;
12166 StoreMnemonic = PPC::STWCX;
12167 break;
12168 case PPC::ATOMIC_CMP_SWAP_I64:
12169 LoadMnemonic = PPC::LDARX;
12170 StoreMnemonic = PPC::STDCX;
12171 break;
12172 }
12173 Register dest = MI.getOperand(0).getReg();
12174 Register ptrA = MI.getOperand(1).getReg();
12175 Register ptrB = MI.getOperand(2).getReg();
12176 Register oldval = MI.getOperand(3).getReg();
12177 Register newval = MI.getOperand(4).getReg();
12178 DebugLoc dl = MI.getDebugLoc();
12179
12180 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12181 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12182 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12183 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12184 F->insert(It, loop1MBB);
12185 F->insert(It, loop2MBB);
12186 F->insert(It, midMBB);
12187 F->insert(It, exitMBB);
12188 exitMBB->splice(exitMBB->begin(), BB,
12189 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12190 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12191
12192 // thisMBB:
12193 // ...
12194 // fallthrough --> loopMBB
12195 BB->addSuccessor(loop1MBB);
12196
12197 // loop1MBB:
12198 // l[bhwd]arx dest, ptr
12199 // cmp[wd] dest, oldval
12200 // bne- midMBB
12201 // loop2MBB:
12202 // st[bhwd]cx. newval, ptr
12203 // bne- loopMBB
12204 // b exitBB
12205 // midMBB:
12206 // st[bhwd]cx. dest, ptr
12207 // exitBB:
12208 BB = loop1MBB;
12209 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12210 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12211 .addReg(oldval)
12212 .addReg(dest);
12213 BuildMI(BB, dl, TII->get(PPC::BCC))
12214 .addImm(PPC::PRED_NE)
12215 .addReg(PPC::CR0)
12216 .addMBB(midMBB);
12217 BB->addSuccessor(loop2MBB);
12218 BB->addSuccessor(midMBB);
12219
12220 BB = loop2MBB;
12221 BuildMI(BB, dl, TII->get(StoreMnemonic))
12222 .addReg(newval)
12223 .addReg(ptrA)
12224 .addReg(ptrB);
12225 BuildMI(BB, dl, TII->get(PPC::BCC))
12226 .addImm(PPC::PRED_NE)
12227 .addReg(PPC::CR0)
12228 .addMBB(loop1MBB);
12229 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12230 BB->addSuccessor(loop1MBB);
12231 BB->addSuccessor(exitMBB);
12232
12233 BB = midMBB;
12234 BuildMI(BB, dl, TII->get(StoreMnemonic))
12235 .addReg(dest)
12236 .addReg(ptrA)
12237 .addReg(ptrB);
12238 BB->addSuccessor(exitMBB);
12239
12240 // exitMBB:
12241 // ...
12242 BB = exitMBB;
12243 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12244 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12245 // We must use 64-bit registers for addresses when targeting 64-bit,
12246 // since we're actually doing arithmetic on them. Other registers
12247 // can be 32-bit.
12248 bool is64bit = Subtarget.isPPC64();
12249 bool isLittleEndian = Subtarget.isLittleEndian();
12250 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12251
12252 Register dest = MI.getOperand(0).getReg();
12253 Register ptrA = MI.getOperand(1).getReg();
12254 Register ptrB = MI.getOperand(2).getReg();
12255 Register oldval = MI.getOperand(3).getReg();
12256 Register newval = MI.getOperand(4).getReg();
12257 DebugLoc dl = MI.getDebugLoc();
12258
12259 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12260 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12261 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12262 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12263 F->insert(It, loop1MBB);
12264 F->insert(It, loop2MBB);
12265 F->insert(It, midMBB);
12266 F->insert(It, exitMBB);
12267 exitMBB->splice(exitMBB->begin(), BB,
12268 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12269 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12270
12271 MachineRegisterInfo &RegInfo = F->getRegInfo();
12272 const TargetRegisterClass *RC =
12273 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12274 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12275
12276 Register PtrReg = RegInfo.createVirtualRegister(RC);
12277 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12278 Register ShiftReg =
12279 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12280 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12281 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12282 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12283 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12284 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12285 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12286 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12287 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12288 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12289 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12290 Register Ptr1Reg;
12291 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12292 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12293 // thisMBB:
12294 // ...
12295 // fallthrough --> loopMBB
12296 BB->addSuccessor(loop1MBB);
12297
12298 // The 4-byte load must be aligned, while a char or short may be
12299 // anywhere in the word. Hence all this nasty bookkeeping code.
12300 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12301 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12302 // xori shift, shift1, 24 [16]
12303 // rlwinm ptr, ptr1, 0, 0, 29
12304 // slw newval2, newval, shift
12305 // slw oldval2, oldval,shift
12306 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12307 // slw mask, mask2, shift
12308 // and newval3, newval2, mask
12309 // and oldval3, oldval2, mask
12310 // loop1MBB:
12311 // lwarx tmpDest, ptr
12312 // and tmp, tmpDest, mask
12313 // cmpw tmp, oldval3
12314 // bne- midMBB
12315 // loop2MBB:
12316 // andc tmp2, tmpDest, mask
12317 // or tmp4, tmp2, newval3
12318 // stwcx. tmp4, ptr
12319 // bne- loop1MBB
12320 // b exitBB
12321 // midMBB:
12322 // stwcx. tmpDest, ptr
12323 // exitBB:
12324 // srw dest, tmpDest, shift
12325 if (ptrA != ZeroReg) {
12326 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12327 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12328 .addReg(ptrA)
12329 .addReg(ptrB);
12330 } else {
12331 Ptr1Reg = ptrB;
12332 }
12333
12334 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12335 // mode.
12336 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12337 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12338 .addImm(3)
12339 .addImm(27)
12340 .addImm(is8bit ? 28 : 27);
12341 if (!isLittleEndian)
12342 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12343 .addReg(Shift1Reg)
12344 .addImm(is8bit ? 24 : 16);
12345 if (is64bit)
12346 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12347 .addReg(Ptr1Reg)
12348 .addImm(0)
12349 .addImm(61);
12350 else
12351 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12352 .addReg(Ptr1Reg)
12353 .addImm(0)
12354 .addImm(0)
12355 .addImm(29);
12356 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12357 .addReg(newval)
12358 .addReg(ShiftReg);
12359 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12360 .addReg(oldval)
12361 .addReg(ShiftReg);
12362 if (is8bit)
12363 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12364 else {
12365 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12366 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12367 .addReg(Mask3Reg)
12368 .addImm(65535);
12369 }
12370 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12371 .addReg(Mask2Reg)
12372 .addReg(ShiftReg);
12373 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12374 .addReg(NewVal2Reg)
12375 .addReg(MaskReg);
12376 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12377 .addReg(OldVal2Reg)
12378 .addReg(MaskReg);
12379
12380 BB = loop1MBB;
12381 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12382 .addReg(ZeroReg)
12383 .addReg(PtrReg);
12384 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12385 .addReg(TmpDestReg)
12386 .addReg(MaskReg);
12387 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12388 .addReg(TmpReg)
12389 .addReg(OldVal3Reg);
12390 BuildMI(BB, dl, TII->get(PPC::BCC))
12391 .addImm(PPC::PRED_NE)
12392 .addReg(PPC::CR0)
12393 .addMBB(midMBB);
12394 BB->addSuccessor(loop2MBB);
12395 BB->addSuccessor(midMBB);
12396
12397 BB = loop2MBB;
12398 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12399 .addReg(TmpDestReg)
12400 .addReg(MaskReg);
12401 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12402 .addReg(Tmp2Reg)
12403 .addReg(NewVal3Reg);
12404 BuildMI(BB, dl, TII->get(PPC::STWCX))
12405 .addReg(Tmp4Reg)
12406 .addReg(ZeroReg)
12407 .addReg(PtrReg);
12408 BuildMI(BB, dl, TII->get(PPC::BCC))
12409 .addImm(PPC::PRED_NE)
12410 .addReg(PPC::CR0)
12411 .addMBB(loop1MBB);
12412 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12413 BB->addSuccessor(loop1MBB);
12414 BB->addSuccessor(exitMBB);
12415
12416 BB = midMBB;
12417 BuildMI(BB, dl, TII->get(PPC::STWCX))
12418 .addReg(TmpDestReg)
12419 .addReg(ZeroReg)
12420 .addReg(PtrReg);
12421 BB->addSuccessor(exitMBB);
12422
12423 // exitMBB:
12424 // ...
12425 BB = exitMBB;
12426 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12427 .addReg(TmpReg)
12428 .addReg(ShiftReg);
12429 } else if (MI.getOpcode() == PPC::FADDrtz) {
12430 // This pseudo performs an FADD with rounding mode temporarily forced
12431 // to round-to-zero. We emit this via custom inserter since the FPSCR
12432 // is not modeled at the SelectionDAG level.
12433 Register Dest = MI.getOperand(0).getReg();
12434 Register Src1 = MI.getOperand(1).getReg();
12435 Register Src2 = MI.getOperand(2).getReg();
12436 DebugLoc dl = MI.getDebugLoc();
12437
12438 MachineRegisterInfo &RegInfo = F->getRegInfo();
12439 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12440
12441 // Save FPSCR value.
12442 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12443
12444 // Set rounding mode to round-to-zero.
12445 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12446 .addImm(31)
12447 .addReg(PPC::RM, RegState::ImplicitDefine);
12448
12449 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12450 .addImm(30)
12451 .addReg(PPC::RM, RegState::ImplicitDefine);
12452
12453 // Perform addition.
12454 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12455 .addReg(Src1)
12456 .addReg(Src2);
12457 if (MI.getFlag(MachineInstr::NoFPExcept))
12458 MIB.setMIFlag(MachineInstr::NoFPExcept);
12459
12460 // Restore FPSCR value.
12461 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12462 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12463 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12464 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12465 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12466 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12467 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12468 ? PPC::ANDI8_rec
12469 : PPC::ANDI_rec;
12470 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12471 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12472
12473 MachineRegisterInfo &RegInfo = F->getRegInfo();
12474 Register Dest = RegInfo.createVirtualRegister(
12475 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12476
12477 DebugLoc Dl = MI.getDebugLoc();
12478 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12479 .addReg(MI.getOperand(1).getReg())
12480 .addImm(1);
12481 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12482 MI.getOperand(0).getReg())
12483 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12484 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12485 DebugLoc Dl = MI.getDebugLoc();
12486 MachineRegisterInfo &RegInfo = F->getRegInfo();
12487 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12488 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12489 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12490 MI.getOperand(0).getReg())
12491 .addReg(CRReg);
12492 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12493 DebugLoc Dl = MI.getDebugLoc();
12494 unsigned Imm = MI.getOperand(1).getImm();
12495 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12496 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12497 MI.getOperand(0).getReg())
12498 .addReg(PPC::CR0EQ);
12499 } else if (MI.getOpcode() == PPC::SETRNDi) {
12500 DebugLoc dl = MI.getDebugLoc();
12501 Register OldFPSCRReg = MI.getOperand(0).getReg();
12502
12503 // Save FPSCR value.
12504 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12505
12506 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12507 // the following settings:
12508 // 00 Round to nearest
12509 // 01 Round to 0
12510 // 10 Round to +inf
12511 // 11 Round to -inf
12512
12513 // When the operand is immediate, using the two least significant bits of
12514 // the immediate to set the bits 62:63 of FPSCR.
12515 unsigned Mode = MI.getOperand(1).getImm();
12516 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12517 .addImm(31)
12518 .addReg(PPC::RM, RegState::ImplicitDefine);
12519
12520 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12521 .addImm(30)
12522 .addReg(PPC::RM, RegState::ImplicitDefine);
12523 } else if (MI.getOpcode() == PPC::SETRND) {
12524 DebugLoc dl = MI.getDebugLoc();
12525
12526 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12527 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12528 // If the target doesn't have DirectMove, we should use stack to do the
12529 // conversion, because the target doesn't have the instructions like mtvsrd
12530 // or mfvsrd to do this conversion directly.
12531 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12532 if (Subtarget.hasDirectMove()) {
12533 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12534 .addReg(SrcReg);
12535 } else {
12536 // Use stack to do the register copy.
12537 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12538 MachineRegisterInfo &RegInfo = F->getRegInfo();
12539 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12540 if (RC == &PPC::F8RCRegClass) {
12541 // Copy register from F8RCRegClass to G8RCRegclass.
12542 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12543, __PRETTY_FUNCTION__))
12543 "Unsupported RegClass.")(((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12543, __PRETTY_FUNCTION__))
;
12544
12545 StoreOp = PPC::STFD;
12546 LoadOp = PPC::LD;
12547 } else {
12548 // Copy register from G8RCRegClass to F8RCRegclass.
12549 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12551, __PRETTY_FUNCTION__))
12550 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12551, __PRETTY_FUNCTION__))
12551 "Unsupported RegClass.")(((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
"Unsupported RegClass.") ? static_cast<void> (0) : __assert_fail
("(RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && \"Unsupported RegClass.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12551, __PRETTY_FUNCTION__))
;
12552 }
12553
12554 MachineFrameInfo &MFI = F->getFrameInfo();
12555 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12556
12557 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12558 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12559 MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12560 MFI.getObjectAlign(FrameIdx));
12561
12562 // Store the SrcReg into the stack.
12563 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12564 .addReg(SrcReg)
12565 .addImm(0)
12566 .addFrameIndex(FrameIdx)
12567 .addMemOperand(MMOStore);
12568
12569 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12570 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12571 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12572 MFI.getObjectAlign(FrameIdx));
12573
12574 // Load from the stack where SrcReg is stored, and save to DestReg,
12575 // so we have done the RegClass conversion from RegClass::SrcReg to
12576 // RegClass::DestReg.
12577 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12578 .addImm(0)
12579 .addFrameIndex(FrameIdx)
12580 .addMemOperand(MMOLoad);
12581 }
12582 };
12583
12584 Register OldFPSCRReg = MI.getOperand(0).getReg();
12585
12586 // Save FPSCR value.
12587 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12588
12589 // When the operand is gprc register, use two least significant bits of the
12590 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12591 //
12592 // copy OldFPSCRTmpReg, OldFPSCRReg
12593 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12594 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12595 // copy NewFPSCRReg, NewFPSCRTmpReg
12596 // mtfsf 255, NewFPSCRReg
12597 MachineOperand SrcOp = MI.getOperand(1);
12598 MachineRegisterInfo &RegInfo = F->getRegInfo();
12599 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12600
12601 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12602
12603 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12604 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12605
12606 // The first operand of INSERT_SUBREG should be a register which has
12607 // subregisters, we only care about its RegClass, so we should use an
12608 // IMPLICIT_DEF register.
12609 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12610 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12611 .addReg(ImDefReg)
12612 .add(SrcOp)
12613 .addImm(1);
12614
12615 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12616 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12617 .addReg(OldFPSCRTmpReg)
12618 .addReg(ExtSrcReg)
12619 .addImm(0)
12620 .addImm(62);
12621
12622 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12623 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12624
12625 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12626 // bits of FPSCR.
12627 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12628 .addImm(255)
12629 .addReg(NewFPSCRReg)
12630 .addImm(0)
12631 .addImm(0);
12632 } else if (MI.getOpcode() == PPC::SETFLM) {
12633 DebugLoc Dl = MI.getDebugLoc();
12634
12635 // Result of setflm is previous FPSCR content, so we need to save it first.
12636 Register OldFPSCRReg = MI.getOperand(0).getReg();
12637 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12638
12639 // Put bits in 32:63 to FPSCR.
12640 Register NewFPSCRReg = MI.getOperand(1).getReg();
12641 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12642 .addImm(255)
12643 .addReg(NewFPSCRReg)
12644 .addImm(0)
12645 .addImm(0);
12646 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12647 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12648 return emitProbedAlloca(MI, BB);
12649 } else {
12650 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12650)
;
12651 }
12652
12653 MI.eraseFromParent(); // The pseudo instruction is gone now.
12654 return BB;
12655}
12656
12657//===----------------------------------------------------------------------===//
12658// Target Optimization Hooks
12659//===----------------------------------------------------------------------===//
12660
12661static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12662 // For the estimates, convergence is quadratic, so we essentially double the
12663 // number of digits correct after every iteration. For both FRE and FRSQRTE,
12664 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12665 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12666 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12667 if (VT.getScalarType() == MVT::f64)
12668 RefinementSteps++;
12669 return RefinementSteps;
12670}
12671
12672SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12673 int Enabled, int &RefinementSteps,
12674 bool &UseOneConstNR,
12675 bool Reciprocal) const {
12676 EVT VT = Operand.getValueType();
12677 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12678 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12679 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12680 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12681 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12682 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12683
12684 // The Newton-Raphson computation with a single constant does not provide
12685 // enough accuracy on some CPUs.
12686 UseOneConstNR = !Subtarget.needsTwoConstNR();
12687 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12688 }
12689 return SDValue();
12690}
12691
12692SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12693 int Enabled,
12694 int &RefinementSteps) const {
12695 EVT VT = Operand.getValueType();
12696 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12697 (VT == MVT::f64 && Subtarget.hasFRE()) ||
12698 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12699 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12700 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12701 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12702 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12703 }
12704 return SDValue();
12705}
12706
12707unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12708 // Note: This functionality is used only when unsafe-fp-math is enabled, and
12709 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12710 // enabled for division), this functionality is redundant with the default
12711 // combiner logic (once the division -> reciprocal/multiply transformation
12712 // has taken place). As a result, this matters more for older cores than for
12713 // newer ones.
12714
12715 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12716 // reciprocal if there are two or more FDIVs (for embedded cores with only
12717 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12718 switch (Subtarget.getCPUDirective()) {
12719 default:
12720 return 3;
12721 case PPC::DIR_440:
12722 case PPC::DIR_A2:
12723 case PPC::DIR_E500:
12724 case PPC::DIR_E500mc:
12725 case PPC::DIR_E5500:
12726 return 2;
12727 }
12728}
12729
12730// isConsecutiveLSLoc needs to work even if all adds have not yet been
12731// collapsed, and so we need to look through chains of them.
12732static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12733 int64_t& Offset, SelectionDAG &DAG) {
12734 if (DAG.isBaseWithConstantOffset(Loc)) {
12735 Base = Loc.getOperand(0);
12736 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12737
12738 // The base might itself be a base plus an offset, and if so, accumulate
12739 // that as well.
12740 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12741 }
12742}
12743
12744static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12745 unsigned Bytes, int Dist,
12746 SelectionDAG &DAG) {
12747 if (VT.getSizeInBits() / 8 != Bytes)
12748 return false;
12749
12750 SDValue BaseLoc = Base->getBasePtr();
12751 if (Loc.getOpcode() == ISD::FrameIndex) {
12752 if (BaseLoc.getOpcode() != ISD::FrameIndex)
12753 return false;
12754 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12755 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12756 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12757 int FS = MFI.getObjectSize(FI);
12758 int BFS = MFI.getObjectSize(BFI);
12759 if (FS != BFS || FS != (int)Bytes) return false;
12760 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12761 }
12762
12763 SDValue Base1 = Loc, Base2 = BaseLoc;
12764 int64_t Offset1 = 0, Offset2 = 0;
12765 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12766 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12767 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12768 return true;
12769
12770 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12771 const GlobalValue *GV1 = nullptr;
12772 const GlobalValue *GV2 = nullptr;
12773 Offset1 = 0;
12774 Offset2 = 0;
12775 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12776 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12777 if (isGA1 && isGA2 && GV1 == GV2)
12778 return Offset1 == (Offset2 + Dist*Bytes);
12779 return false;
12780}
12781
12782// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12783// not enforce equality of the chain operands.
12784static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12785 unsigned Bytes, int Dist,
12786 SelectionDAG &DAG) {
12787 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12788 EVT VT = LS->getMemoryVT();
12789 SDValue Loc = LS->getBasePtr();
12790 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12791 }
12792
12793 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12794 EVT VT;
12795 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12796 default: return false;
12797 case Intrinsic::ppc_altivec_lvx:
12798 case Intrinsic::ppc_altivec_lvxl:
12799 case Intrinsic::ppc_vsx_lxvw4x:
12800 case Intrinsic::ppc_vsx_lxvw4x_be:
12801 VT = MVT::v4i32;
12802 break;
12803 case Intrinsic::ppc_vsx_lxvd2x:
12804 case Intrinsic::ppc_vsx_lxvd2x_be:
12805 VT = MVT::v2f64;
12806 break;
12807 case Intrinsic::ppc_altivec_lvebx:
12808 VT = MVT::i8;
12809 break;
12810 case Intrinsic::ppc_altivec_lvehx:
12811 VT = MVT::i16;
12812 break;
12813 case Intrinsic::ppc_altivec_lvewx:
12814 VT = MVT::i32;
12815 break;
12816 }
12817
12818 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12819 }
12820
12821 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12822 EVT VT;
12823 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12824 default: return false;
12825 case Intrinsic::ppc_altivec_stvx:
12826 case Intrinsic::ppc_altivec_stvxl:
12827 case Intrinsic::ppc_vsx_stxvw4x:
12828 VT = MVT::v4i32;
12829 break;
12830 case Intrinsic::ppc_vsx_stxvd2x:
12831 VT = MVT::v2f64;
12832 break;
12833 case Intrinsic::ppc_vsx_stxvw4x_be:
12834 VT = MVT::v4i32;
12835 break;
12836 case Intrinsic::ppc_vsx_stxvd2x_be:
12837 VT = MVT::v2f64;
12838 break;
12839 case Intrinsic::ppc_altivec_stvebx:
12840 VT = MVT::i8;
12841 break;
12842 case Intrinsic::ppc_altivec_stvehx:
12843 VT = MVT::i16;
12844 break;
12845 case Intrinsic::ppc_altivec_stvewx:
12846 VT = MVT::i32;
12847 break;
12848 }
12849
12850 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12851 }
12852
12853 return false;
12854}
12855
12856// Return true is there is a nearyby consecutive load to the one provided
12857// (regardless of alignment). We search up and down the chain, looking though
12858// token factors and other loads (but nothing else). As a result, a true result
12859// indicates that it is safe to create a new consecutive load adjacent to the
12860// load provided.
12861static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12862 SDValue Chain = LD->getChain();
12863 EVT VT = LD->getMemoryVT();
12864
12865 SmallSet<SDNode *, 16> LoadRoots;
12866 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12867 SmallSet<SDNode *, 16> Visited;
12868
12869 // First, search up the chain, branching to follow all token-factor operands.
12870 // If we find a consecutive load, then we're done, otherwise, record all
12871 // nodes just above the top-level loads and token factors.
12872 while (!Queue.empty()) {
12873 SDNode *ChainNext = Queue.pop_back_val();
12874 if (!Visited.insert(ChainNext).second)
12875 continue;
12876
12877 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12878 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12879 return true;
12880
12881 if (!Visited.count(ChainLD->getChain().getNode()))
12882 Queue.push_back(ChainLD->getChain().getNode());
12883 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12884 for (const SDUse &O : ChainNext->ops())
12885 if (!Visited.count(O.getNode()))
12886 Queue.push_back(O.getNode());
12887 } else
12888 LoadRoots.insert(ChainNext);
12889 }
12890
12891 // Second, search down the chain, starting from the top-level nodes recorded
12892 // in the first phase. These top-level nodes are the nodes just above all
12893 // loads and token factors. Starting with their uses, recursively look though
12894 // all loads (just the chain uses) and token factors to find a consecutive
12895 // load.
12896 Visited.clear();
12897 Queue.clear();
12898
12899 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12900 IE = LoadRoots.end(); I != IE; ++I) {
12901 Queue.push_back(*I);
12902
12903 while (!Queue.empty()) {
12904 SDNode *LoadRoot = Queue.pop_back_val();
12905 if (!Visited.insert(LoadRoot).second)
12906 continue;
12907
12908 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12909 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12910 return true;
12911
12912 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12913 UE = LoadRoot->use_end(); UI != UE; ++UI)
12914 if (((isa<MemSDNode>(*UI) &&
12915 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12916 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12917 Queue.push_back(*UI);
12918 }
12919 }
12920
12921 return false;
12922}
12923
12924/// This function is called when we have proved that a SETCC node can be replaced
12925/// by subtraction (and other supporting instructions) so that the result of
12926/// comparison is kept in a GPR instead of CR. This function is purely for
12927/// codegen purposes and has some flags to guide the codegen process.
12928static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12929 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12930 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12930, __PRETTY_FUNCTION__))
;
12931
12932 // Zero extend the operands to the largest legal integer. Originally, they
12933 // must be of a strictly smaller size.
12934 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12935 DAG.getConstant(Size, DL, MVT::i32));
12936 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12937 DAG.getConstant(Size, DL, MVT::i32));
12938
12939 // Swap if needed. Depends on the condition code.
12940 if (Swap)
12941 std::swap(Op0, Op1);
12942
12943 // Subtract extended integers.
12944 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12945
12946 // Move the sign bit to the least significant position and zero out the rest.
12947 // Now the least significant bit carries the result of original comparison.
12948 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12949 DAG.getConstant(Size - 1, DL, MVT::i32));
12950 auto Final = Shifted;
12951
12952 // Complement the result if needed. Based on the condition code.
12953 if (Complement)
12954 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12955 DAG.getConstant(1, DL, MVT::i64));
12956
12957 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12958}
12959
12960SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12961 DAGCombinerInfo &DCI) const {
12962 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.")((N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"ISD::SETCC Expected.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 12962, __PRETTY_FUNCTION__))
;
12963
12964 SelectionDAG &DAG = DCI.DAG;
12965 SDLoc DL(N);
12966
12967 // Size of integers being compared has a critical role in the following
12968 // analysis, so we prefer to do this when all types are legal.
12969 if (!DCI.isAfterLegalizeDAG())
12970 return SDValue();
12971
12972 // If all users of SETCC extend its value to a legal integer type
12973 // then we replace SETCC with a subtraction
12974 for (SDNode::use_iterator UI = N->use_begin(),
12975 UE = N->use_end(); UI != UE; ++UI) {
12976 if (UI->getOpcode() != ISD::ZERO_EXTEND)
12977 return SDValue();
12978 }
12979
12980 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12981 auto OpSize = N->getOperand(0).getValueSizeInBits();
12982
12983 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12984
12985 if (OpSize < Size) {
12986 switch (CC) {
12987 default: break;
12988 case ISD::SETULT:
12989 return generateEquivalentSub(N, Size, false, false, DL, DAG);
12990 case ISD::SETULE:
12991 return generateEquivalentSub(N, Size, true, true, DL, DAG);
12992 case ISD::SETUGT:
12993 return generateEquivalentSub(N, Size, false, true, DL, DAG);
12994 case ISD::SETUGE:
12995 return generateEquivalentSub(N, Size, true, false, DL, DAG);
12996 }
12997 }
12998
12999 return SDValue();
13000}
13001
13002SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13003 DAGCombinerInfo &DCI) const {
13004 SelectionDAG &DAG = DCI.DAG;
13005 SDLoc dl(N);
13006
13007 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits")((Subtarget.useCRBits() && "Expecting to be tracking CR bits"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.useCRBits() && \"Expecting to be tracking CR bits\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13007, __PRETTY_FUNCTION__))
;
13008 // If we're tracking CR bits, we need to be careful that we don't have:
13009 // trunc(binary-ops(zext(x), zext(y)))
13010 // or
13011 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13012 // such that we're unnecessarily moving things into GPRs when it would be
13013 // better to keep them in CR bits.
13014
13015 // Note that trunc here can be an actual i1 trunc, or can be the effective
13016 // truncation that comes from a setcc or select_cc.
13017 if (N->getOpcode() == ISD::TRUNCATE &&
13018 N->getValueType(0) != MVT::i1)
13019 return SDValue();
13020
13021 if (N->getOperand(0).getValueType() != MVT::i32 &&
13022 N->getOperand(0).getValueType() != MVT::i64)
13023 return SDValue();
13024
13025 if (N->getOpcode() == ISD::SETCC ||
13026 N->getOpcode() == ISD::SELECT_CC) {
13027 // If we're looking at a comparison, then we need to make sure that the
13028 // high bits (all except for the first) don't matter the result.
13029 ISD::CondCode CC =
13030 cast<CondCodeSDNode>(N->getOperand(
13031 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13032 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13033
13034 if (ISD::isSignedIntSetCC(CC)) {
13035 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13036 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13037 return SDValue();
13038 } else if (ISD::isUnsignedIntSetCC(CC)) {
13039 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13040 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13041 !DAG.MaskedValueIsZero(N->getOperand(1),
13042 APInt::getHighBitsSet(OpBits, OpBits-1)))
13043 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13044 : SDValue());
13045 } else {
13046 // This is neither a signed nor an unsigned comparison, just make sure
13047 // that the high bits are equal.
13048 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13049 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13050
13051 // We don't really care about what is known about the first bit (if
13052 // anything), so clear it in all masks prior to comparing them.
13053 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13054 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13055
13056 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13057 return SDValue();
13058 }
13059 }
13060
13061 // We now know that the higher-order bits are irrelevant, we just need to
13062 // make sure that all of the intermediate operations are bit operations, and
13063 // all inputs are extensions.
13064 if (N->getOperand(0).getOpcode() != ISD::AND &&
13065 N->getOperand(0).getOpcode() != ISD::OR &&
13066 N->getOperand(0).getOpcode() != ISD::XOR &&
13067 N->getOperand(0).getOpcode() != ISD::SELECT &&
13068 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13069 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13070 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13071 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13072 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13073 return SDValue();
13074
13075 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13076 N->getOperand(1).getOpcode() != ISD::AND &&
13077 N->getOperand(1).getOpcode() != ISD::OR &&
13078 N->getOperand(1).getOpcode() != ISD::XOR &&
13079 N->getOperand(1).getOpcode() != ISD::SELECT &&
13080 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13081 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13082 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13083 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13084 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13085 return SDValue();
13086
13087 SmallVector<SDValue, 4> Inputs;
13088 SmallVector<SDValue, 8> BinOps, PromOps;
13089 SmallPtrSet<SDNode *, 16> Visited;
13090
13091 for (unsigned i = 0; i < 2; ++i) {
13092 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13093 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13094 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13095 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13096 isa<ConstantSDNode>(N->getOperand(i)))
13097 Inputs.push_back(N->getOperand(i));
13098 else
13099 BinOps.push_back(N->getOperand(i));
13100
13101 if (N->getOpcode() == ISD::TRUNCATE)
13102 break;
13103 }
13104
13105 // Visit all inputs, collect all binary operations (and, or, xor and
13106 // select) that are all fed by extensions.
13107 while (!BinOps.empty()) {
13108 SDValue BinOp = BinOps.back();
13109 BinOps.pop_back();
13110
13111 if (!Visited.insert(BinOp.getNode()).second)
13112 continue;
13113
13114 PromOps.push_back(BinOp);
13115
13116 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13117 // The condition of the select is not promoted.
13118 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13119 continue;
13120 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13121 continue;
13122
13123 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13124 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13125 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13126 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13127 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13128 Inputs.push_back(BinOp.getOperand(i));
13129 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13130 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13131 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13132 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13133 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13134 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13135 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13136 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13137 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13138 BinOps.push_back(BinOp.getOperand(i));
13139 } else {
13140 // We have an input that is not an extension or another binary
13141 // operation; we'll abort this transformation.
13142 return SDValue();
13143 }
13144 }
13145 }
13146
13147 // Make sure that this is a self-contained cluster of operations (which
13148 // is not quite the same thing as saying that everything has only one
13149 // use).
13150 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13151 if (isa<ConstantSDNode>(Inputs[i]))
13152 continue;
13153
13154 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13155 UE = Inputs[i].getNode()->use_end();
13156 UI != UE; ++UI) {
13157 SDNode *User = *UI;
13158 if (User != N && !Visited.count(User))
13159 return SDValue();
13160
13161 // Make sure that we're not going to promote the non-output-value
13162 // operand(s) or SELECT or SELECT_CC.
13163 // FIXME: Although we could sometimes handle this, and it does occur in
13164 // practice that one of the condition inputs to the select is also one of
13165 // the outputs, we currently can't deal with this.
13166 if (User->getOpcode() == ISD::SELECT) {
13167 if (User->getOperand(0) == Inputs[i])
13168 return SDValue();
13169 } else if (User->getOpcode() == ISD::SELECT_CC) {
13170 if (User->getOperand(0) == Inputs[i] ||
13171 User->getOperand(1) == Inputs[i])
13172 return SDValue();
13173 }
13174 }
13175 }
13176
13177 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13178 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13179 UE = PromOps[i].getNode()->use_end();
13180 UI != UE; ++UI) {
13181 SDNode *User = *UI;
13182 if (User != N && !Visited.count(User))
13183 return SDValue();
13184
13185 // Make sure that we're not going to promote the non-output-value
13186 // operand(s) or SELECT or SELECT_CC.
13187 // FIXME: Although we could sometimes handle this, and it does occur in
13188 // practice that one of the condition inputs to the select is also one of
13189 // the outputs, we currently can't deal with this.
13190 if (User->getOpcode() == ISD::SELECT) {
13191 if (User->getOperand(0) == PromOps[i])
13192 return SDValue();
13193 } else if (User->getOpcode() == ISD::SELECT_CC) {
13194 if (User->getOperand(0) == PromOps[i] ||
13195 User->getOperand(1) == PromOps[i])
13196 return SDValue();
13197 }
13198 }
13199 }
13200
13201 // Replace all inputs with the extension operand.
13202 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13203 // Constants may have users outside the cluster of to-be-promoted nodes,
13204 // and so we need to replace those as we do the promotions.
13205 if (isa<ConstantSDNode>(Inputs[i]))
13206 continue;
13207 else
13208 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13209 }
13210
13211 std::list<HandleSDNode> PromOpHandles;
13212 for (auto &PromOp : PromOps)
13213 PromOpHandles.emplace_back(PromOp);
13214
13215 // Replace all operations (these are all the same, but have a different
13216 // (i1) return type). DAG.getNode will validate that the types of
13217 // a binary operator match, so go through the list in reverse so that
13218 // we've likely promoted both operands first. Any intermediate truncations or
13219 // extensions disappear.
13220 while (!PromOpHandles.empty()) {
13221 SDValue PromOp = PromOpHandles.back().getValue();
13222 PromOpHandles.pop_back();
13223
13224 if (PromOp.getOpcode() == ISD::TRUNCATE ||
13225 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13226 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13227 PromOp.getOpcode() == ISD::ANY_EXTEND) {
13228 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13229 PromOp.getOperand(0).getValueType() != MVT::i1) {
13230 // The operand is not yet ready (see comment below).
13231 PromOpHandles.emplace_front(PromOp);
13232 continue;
13233 }
13234
13235 SDValue RepValue = PromOp.getOperand(0);
13236 if (isa<ConstantSDNode>(RepValue))
13237 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13238
13239 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13240 continue;
13241 }
13242
13243 unsigned C;
13244 switch (PromOp.getOpcode()) {
13245 default: C = 0; break;
13246 case ISD::SELECT: C = 1; break;
13247 case ISD::SELECT_CC: C = 2; break;
13248 }
13249
13250 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13251 PromOp.getOperand(C).getValueType() != MVT::i1) ||
13252 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13253 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13254 // The to-be-promoted operands of this node have not yet been
13255 // promoted (this should be rare because we're going through the
13256 // list backward, but if one of the operands has several users in
13257 // this cluster of to-be-promoted nodes, it is possible).
13258 PromOpHandles.emplace_front(PromOp);
13259 continue;
13260 }
13261
13262 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13263 PromOp.getNode()->op_end());
13264
13265 // If there are any constant inputs, make sure they're replaced now.
13266 for (unsigned i = 0; i < 2; ++i)
13267 if (isa<ConstantSDNode>(Ops[C+i]))
13268 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13269
13270 DAG.ReplaceAllUsesOfValueWith(PromOp,
13271 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13272 }
13273
13274 // Now we're left with the initial truncation itself.
13275 if (N->getOpcode() == ISD::TRUNCATE)
13276 return N->getOperand(0);
13277
13278 // Otherwise, this is a comparison. The operands to be compared have just
13279 // changed type (to i1), but everything else is the same.
13280 return SDValue(N, 0);
13281}
13282
13283SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13284 DAGCombinerInfo &DCI) const {
13285 SelectionDAG &DAG = DCI.DAG;
13286 SDLoc dl(N);
13287
13288 // If we're tracking CR bits, we need to be careful that we don't have:
13289 // zext(binary-ops(trunc(x), trunc(y)))
13290 // or
13291 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13292 // such that we're unnecessarily moving things into CR bits that can more
13293 // efficiently stay in GPRs. Note that if we're not certain that the high
13294 // bits are set as required by the final extension, we still may need to do
13295 // some masking to get the proper behavior.
13296
13297 // This same functionality is important on PPC64 when dealing with
13298 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13299 // the return values of functions. Because it is so similar, it is handled
13300 // here as well.
13301
13302 if (N->getValueType(0) != MVT::i32 &&
13303 N->getValueType(0) != MVT::i64)
13304 return SDValue();
13305
13306 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13307 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13308 return SDValue();
13309
13310 if (N->getOperand(0).getOpcode() != ISD::AND &&
13311 N->getOperand(0).getOpcode() != ISD::OR &&
13312 N->getOperand(0).getOpcode() != ISD::XOR &&
13313 N->getOperand(0).getOpcode() != ISD::SELECT &&
13314 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13315 return SDValue();
13316
13317 SmallVector<SDValue, 4> Inputs;
13318 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13319 SmallPtrSet<SDNode *, 16> Visited;
13320
13321 // Visit all inputs, collect all binary operations (and, or, xor and
13322 // select) that are all fed by truncations.
13323 while (!BinOps.empty()) {
13324 SDValue BinOp = BinOps.back();
13325 BinOps.pop_back();
13326
13327 if (!Visited.insert(BinOp.getNode()).second)
13328 continue;
13329
13330 PromOps.push_back(BinOp);
13331
13332 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13333 // The condition of the select is not promoted.
13334 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13335 continue;
13336 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13337 continue;
13338
13339 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13340 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13341 Inputs.push_back(BinOp.getOperand(i));
13342 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13343 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13344 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13345 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13346 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13347 BinOps.push_back(BinOp.getOperand(i));
13348 } else {
13349 // We have an input that is not a truncation or another binary
13350 // operation; we'll abort this transformation.
13351 return SDValue();
13352 }
13353 }
13354 }
13355
13356 // The operands of a select that must be truncated when the select is
13357 // promoted because the operand is actually part of the to-be-promoted set.
13358 DenseMap<SDNode *, EVT> SelectTruncOp[2];
13359
13360 // Make sure that this is a self-contained cluster of operations (which
13361 // is not quite the same thing as saying that everything has only one
13362 // use).
13363 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13364 if (isa<ConstantSDNode>(Inputs[i]))
13365 continue;
13366
13367 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13368 UE = Inputs[i].getNode()->use_end();
13369 UI != UE; ++UI) {
13370 SDNode *User = *UI;
13371 if (User != N && !Visited.count(User))
13372 return SDValue();
13373
13374 // If we're going to promote the non-output-value operand(s) or SELECT or
13375 // SELECT_CC, record them for truncation.
13376 if (User->getOpcode() == ISD::SELECT) {
13377 if (User->getOperand(0) == Inputs[i])
13378 SelectTruncOp[0].insert(std::make_pair(User,
13379 User->getOperand(0).getValueType()));
13380 } else if (User->getOpcode() == ISD::SELECT_CC) {
13381 if (User->getOperand(0) == Inputs[i])
13382 SelectTruncOp[0].insert(std::make_pair(User,
13383 User->getOperand(0).getValueType()));
13384 if (User->getOperand(1) == Inputs[i])
13385 SelectTruncOp[1].insert(std::make_pair(User,
13386 User->getOperand(1).getValueType()));
13387 }
13388 }
13389 }
13390
13391 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13392 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13393 UE = PromOps[i].getNode()->use_end();
13394 UI != UE; ++UI) {
13395 SDNode *User = *UI;
13396 if (User != N && !Visited.count(User))
13397 return SDValue();
13398
13399 // If we're going to promote the non-output-value operand(s) or SELECT or
13400 // SELECT_CC, record them for truncation.
13401 if (User->getOpcode() == ISD::SELECT) {
13402 if (User->getOperand(0) == PromOps[i])
13403 SelectTruncOp[0].insert(std::make_pair(User,
13404 User->getOperand(0).getValueType()));
13405 } else if (User->getOpcode() == ISD::SELECT_CC) {
13406 if (User->getOperand(0) == PromOps[i])
13407 SelectTruncOp[0].insert(std::make_pair(User,
13408 User->getOperand(0).getValueType()));
13409 if (User->getOperand(1) == PromOps[i])
13410 SelectTruncOp[1].insert(std::make_pair(User,
13411 User->getOperand(1).getValueType()));
13412 }
13413 }
13414 }
13415
13416 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13417 bool ReallyNeedsExt = false;
13418 if (N->getOpcode() != ISD::ANY_EXTEND) {
13419 // If all of the inputs are not already sign/zero extended, then
13420 // we'll still need to do that at the end.
13421 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13422 if (isa<ConstantSDNode>(Inputs[i]))
13423 continue;
13424
13425 unsigned OpBits =
13426 Inputs[i].getOperand(0).getValueSizeInBits();
13427 assert(PromBits < OpBits && "Truncation not to a smaller bit count?")((PromBits < OpBits && "Truncation not to a smaller bit count?"
) ? static_cast<void> (0) : __assert_fail ("PromBits < OpBits && \"Truncation not to a smaller bit count?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13427, __PRETTY_FUNCTION__))
;
13428
13429 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13430 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13431 APInt::getHighBitsSet(OpBits,
13432 OpBits-PromBits))) ||
13433 (N->getOpcode() == ISD::SIGN_EXTEND &&
13434 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13435 (OpBits-(PromBits-1)))) {
13436 ReallyNeedsExt = true;
13437 break;
13438 }
13439 }
13440 }
13441
13442 // Replace all inputs, either with the truncation operand, or a
13443 // truncation or extension to the final output type.
13444 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13445 // Constant inputs need to be replaced with the to-be-promoted nodes that
13446 // use them because they might have users outside of the cluster of
13447 // promoted nodes.
13448 if (isa<ConstantSDNode>(Inputs[i]))
13449 continue;
13450
13451 SDValue InSrc = Inputs[i].getOperand(0);
13452 if (Inputs[i].getValueType() == N->getValueType(0))
13453 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13454 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13455 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13456 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13457 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13458 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13459 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13460 else
13461 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13462 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13463 }
13464
13465 std::list<HandleSDNode> PromOpHandles;
13466 for (auto &PromOp : PromOps)
13467 PromOpHandles.emplace_back(PromOp);
13468
13469 // Replace all operations (these are all the same, but have a different
13470 // (promoted) return type). DAG.getNode will validate that the types of
13471 // a binary operator match, so go through the list in reverse so that
13472 // we've likely promoted both operands first.
13473 while (!PromOpHandles.empty()) {
13474 SDValue PromOp = PromOpHandles.back().getValue();
13475 PromOpHandles.pop_back();
13476
13477 unsigned C;
13478 switch (PromOp.getOpcode()) {
13479 default: C = 0; break;
13480 case ISD::SELECT: C = 1; break;
13481 case ISD::SELECT_CC: C = 2; break;
13482 }
13483
13484 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13485 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13486 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13487 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13488 // The to-be-promoted operands of this node have not yet been
13489 // promoted (this should be rare because we're going through the
13490 // list backward, but if one of the operands has several users in
13491 // this cluster of to-be-promoted nodes, it is possible).
13492 PromOpHandles.emplace_front(PromOp);
13493 continue;
13494 }
13495
13496 // For SELECT and SELECT_CC nodes, we do a similar check for any
13497 // to-be-promoted comparison inputs.
13498 if (PromOp.getOpcode() == ISD::SELECT ||
13499 PromOp.getOpcode() == ISD::SELECT_CC) {
13500 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13501 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13502 (SelectTruncOp[1].count(PromOp.getNode()) &&
13503 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13504 PromOpHandles.emplace_front(PromOp);
13505 continue;
13506 }
13507 }
13508
13509 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13510 PromOp.getNode()->op_end());
13511
13512 // If this node has constant inputs, then they'll need to be promoted here.
13513 for (unsigned i = 0; i < 2; ++i) {
13514 if (!isa<ConstantSDNode>(Ops[C+i]))
13515 continue;
13516 if (Ops[C+i].getValueType() == N->getValueType(0))
13517 continue;
13518
13519 if (N->getOpcode() == ISD::SIGN_EXTEND)
13520 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13521 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13522 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13523 else
13524 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13525 }
13526
13527 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13528 // truncate them again to the original value type.
13529 if (PromOp.getOpcode() == ISD::SELECT ||
13530 PromOp.getOpcode() == ISD::SELECT_CC) {
13531 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13532 if (SI0 != SelectTruncOp[0].end())
13533 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13534 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13535 if (SI1 != SelectTruncOp[1].end())
13536 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13537 }
13538
13539 DAG.ReplaceAllUsesOfValueWith(PromOp,
13540 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13541 }
13542
13543 // Now we're left with the initial extension itself.
13544 if (!ReallyNeedsExt)
13545 return N->getOperand(0);
13546
13547 // To zero extend, just mask off everything except for the first bit (in the
13548 // i1 case).
13549 if (N->getOpcode() == ISD::ZERO_EXTEND)
13550 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13551 DAG.getConstant(APInt::getLowBitsSet(
13552 N->getValueSizeInBits(0), PromBits),
13553 dl, N->getValueType(0)));
13554
13555 assert(N->getOpcode() == ISD::SIGN_EXTEND &&((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13556, __PRETTY_FUNCTION__))
13556 "Invalid extension type")((N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SIGN_EXTEND && \"Invalid extension type\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13556, __PRETTY_FUNCTION__))
;
13557 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13558 SDValue ShiftCst =
13559 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13560 return DAG.getNode(
13561 ISD::SRA, dl, N->getValueType(0),
13562 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13563 ShiftCst);
13564}
13565
13566SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13567 DAGCombinerInfo &DCI) const {
13568 assert(N->getOpcode() == ISD::SETCC &&((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13569, __PRETTY_FUNCTION__))
13569 "Should be called with a SETCC node")((N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Should be called with a SETCC node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13569, __PRETTY_FUNCTION__))
;
13570
13571 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13572 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13573 SDValue LHS = N->getOperand(0);
13574 SDValue RHS = N->getOperand(1);
13575
13576 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13577 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13578 LHS.hasOneUse())
13579 std::swap(LHS, RHS);
13580
13581 // x == 0-y --> x+y == 0
13582 // x != 0-y --> x+y != 0
13583 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13584 RHS.hasOneUse()) {
13585 SDLoc DL(N);
13586 SelectionDAG &DAG = DCI.DAG;
13587 EVT VT = N->getValueType(0);
13588 EVT OpVT = LHS.getValueType();
13589 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13590 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13591 }
13592 }
13593
13594 return DAGCombineTruncBoolExt(N, DCI);
13595}
13596
13597// Is this an extending load from an f32 to an f64?
13598static bool isFPExtLoad(SDValue Op) {
13599 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13600 return LD->getExtensionType() == ISD::EXTLOAD &&
13601 Op.getValueType() == MVT::f64;
13602 return false;
13603}
13604
13605/// Reduces the number of fp-to-int conversion when building a vector.
13606///
13607/// If this vector is built out of floating to integer conversions,
13608/// transform it to a vector built out of floating point values followed by a
13609/// single floating to integer conversion of the vector.
13610/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13611/// becomes (fptosi (build_vector ($A, $B, ...)))
13612SDValue PPCTargetLowering::
13613combineElementTruncationToVectorTruncation(SDNode *N,
13614 DAGCombinerInfo &DCI) const {
13615 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13616, __PRETTY_FUNCTION__))
13616 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13616, __PRETTY_FUNCTION__))
;
13617
13618 SelectionDAG &DAG = DCI.DAG;
13619 SDLoc dl(N);
13620
13621 SDValue FirstInput = N->getOperand(0);
13622 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13623, __PRETTY_FUNCTION__))
13623 "The input operand must be an fp-to-int conversion.")((FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."
) ? static_cast<void> (0) : __assert_fail ("FirstInput.getOpcode() == PPCISD::MFVSR && \"The input operand must be an fp-to-int conversion.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13623, __PRETTY_FUNCTION__))
;
13624
13625 // This combine happens after legalization so the fp_to_[su]i nodes are
13626 // already converted to PPCSISD nodes.
13627 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13628 if (FirstConversion == PPCISD::FCTIDZ ||
13629 FirstConversion == PPCISD::FCTIDUZ ||
13630 FirstConversion == PPCISD::FCTIWZ ||
13631 FirstConversion == PPCISD::FCTIWUZ) {
13632 bool IsSplat = true;
13633 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13634 FirstConversion == PPCISD::FCTIWUZ;
13635 EVT SrcVT = FirstInput.getOperand(0).getValueType();
13636 SmallVector<SDValue, 4> Ops;
13637 EVT TargetVT = N->getValueType(0);
13638 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13639 SDValue NextOp = N->getOperand(i);
13640 if (NextOp.getOpcode() != PPCISD::MFVSR)
13641 return SDValue();
13642 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13643 if (NextConversion != FirstConversion)
13644 return SDValue();
13645 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13646 // This is not valid if the input was originally double precision. It is
13647 // also not profitable to do unless this is an extending load in which
13648 // case doing this combine will allow us to combine consecutive loads.
13649 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13650 return SDValue();
13651 if (N->getOperand(i) != FirstInput)
13652 IsSplat = false;
13653 }
13654
13655 // If this is a splat, we leave it as-is since there will be only a single
13656 // fp-to-int conversion followed by a splat of the integer. This is better
13657 // for 32-bit and smaller ints and neutral for 64-bit ints.
13658 if (IsSplat)
13659 return SDValue();
13660
13661 // Now that we know we have the right type of node, get its operands
13662 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13663 SDValue In = N->getOperand(i).getOperand(0);
13664 if (Is32Bit) {
13665 // For 32-bit values, we need to add an FP_ROUND node (if we made it
13666 // here, we know that all inputs are extending loads so this is safe).
13667 if (In.isUndef())
13668 Ops.push_back(DAG.getUNDEF(SrcVT));
13669 else {
13670 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13671 MVT::f32, In.getOperand(0),
13672 DAG.getIntPtrConstant(1, dl));
13673 Ops.push_back(Trunc);
13674 }
13675 } else
13676 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13677 }
13678
13679 unsigned Opcode;
13680 if (FirstConversion == PPCISD::FCTIDZ ||
13681 FirstConversion == PPCISD::FCTIWZ)
13682 Opcode = ISD::FP_TO_SINT;
13683 else
13684 Opcode = ISD::FP_TO_UINT;
13685
13686 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13687 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13688 return DAG.getNode(Opcode, dl, TargetVT, BV);
13689 }
13690 return SDValue();
13691}
13692
13693/// Reduce the number of loads when building a vector.
13694///
13695/// Building a vector out of multiple loads can be converted to a load
13696/// of the vector type if the loads are consecutive. If the loads are
13697/// consecutive but in descending order, a shuffle is added at the end
13698/// to reorder the vector.
13699static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13700 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13701, __PRETTY_FUNCTION__))
13701 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13701, __PRETTY_FUNCTION__))
;
13702
13703 SDLoc dl(N);
13704
13705 // Return early for non byte-sized type, as they can't be consecutive.
13706 if (!N->getValueType(0).getVectorElementType().isByteSized())
13707 return SDValue();
13708
13709 bool InputsAreConsecutiveLoads = true;
13710 bool InputsAreReverseConsecutive = true;
13711 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13712 SDValue FirstInput = N->getOperand(0);
13713 bool IsRoundOfExtLoad = false;
13714
13715 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13716 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13717 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13718 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13719 }
13720 // Not a build vector of (possibly fp_rounded) loads.
13721 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13722 N->getNumOperands() == 1)
13723 return SDValue();
13724
13725 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13726 // If any inputs are fp_round(extload), they all must be.
13727 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13728 return SDValue();
13729
13730 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13731 N->getOperand(i);
13732 if (NextInput.getOpcode() != ISD::LOAD)
13733 return SDValue();
13734
13735 SDValue PreviousInput =
13736 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13737 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13738 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13739
13740 // If any inputs are fp_round(extload), they all must be.
13741 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13742 return SDValue();
13743
13744 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13745 InputsAreConsecutiveLoads = false;
13746 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13747 InputsAreReverseConsecutive = false;
13748
13749 // Exit early if the loads are neither consecutive nor reverse consecutive.
13750 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13751 return SDValue();
13752 }
13753
13754 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13755, __PRETTY_FUNCTION__))
13755 "The loads cannot be both consecutive and reverse consecutive.")((!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive
) && "The loads cannot be both consecutive and reverse consecutive."
) ? static_cast<void> (0) : __assert_fail ("!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && \"The loads cannot be both consecutive and reverse consecutive.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13755, __PRETTY_FUNCTION__))
;
13756
13757 SDValue FirstLoadOp =
13758 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13759 SDValue LastLoadOp =
13760 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13761 N->getOperand(N->getNumOperands()-1);
13762
13763 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13764 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13765 if (InputsAreConsecutiveLoads) {
13766 assert(LD1 && "Input needs to be a LoadSDNode.")((LD1 && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LD1 && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13766, __PRETTY_FUNCTION__))
;
13767 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13768 LD1->getBasePtr(), LD1->getPointerInfo(),
13769 LD1->getAlignment());
13770 }
13771 if (InputsAreReverseConsecutive) {
13772 assert(LDL && "Input needs to be a LoadSDNode.")((LDL && "Input needs to be a LoadSDNode.") ? static_cast
<void> (0) : __assert_fail ("LDL && \"Input needs to be a LoadSDNode.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13772, __PRETTY_FUNCTION__))
;
13773 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13774 LDL->getBasePtr(), LDL->getPointerInfo(),
13775 LDL->getAlignment());
13776 SmallVector<int, 16> Ops;
13777 for (int i = N->getNumOperands() - 1; i >= 0; i--)
13778 Ops.push_back(i);
13779
13780 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13781 DAG.getUNDEF(N->getValueType(0)), Ops);
13782 }
13783 return SDValue();
13784}
13785
13786// This function adds the required vector_shuffle needed to get
13787// the elements of the vector extract in the correct position
13788// as specified by the CorrectElems encoding.
13789static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13790 SDValue Input, uint64_t Elems,
13791 uint64_t CorrectElems) {
13792 SDLoc dl(N);
13793
13794 unsigned NumElems = Input.getValueType().getVectorNumElements();
13795 SmallVector<int, 16> ShuffleMask(NumElems, -1);
13796
13797 // Knowing the element indices being extracted from the original
13798 // vector and the order in which they're being inserted, just put
13799 // them at element indices required for the instruction.
13800 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13801 if (DAG.getDataLayout().isLittleEndian())
13802 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13803 else
13804 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13805 CorrectElems = CorrectElems >> 8;
13806 Elems = Elems >> 8;
13807 }
13808
13809 SDValue Shuffle =
13810 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13811 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13812
13813 EVT VT = N->getValueType(0);
13814 SDValue Conv = DAG.getBitcast(VT, Shuffle);
13815
13816 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13817 Input.getValueType().getVectorElementType(),
13818 VT.getVectorNumElements());
13819 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13820 DAG.getValueType(ExtVT));
13821}
13822
13823// Look for build vector patterns where input operands come from sign
13824// extended vector_extract elements of specific indices. If the correct indices
13825// aren't used, add a vector shuffle to fix up the indices and create
13826// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13827// during instruction selection.
13828static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13829 // This array encodes the indices that the vector sign extend instructions
13830 // extract from when extending from one type to another for both BE and LE.
13831 // The right nibble of each byte corresponds to the LE incides.
13832 // and the left nibble of each byte corresponds to the BE incides.
13833 // For example: 0x3074B8FC byte->word
13834 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13835 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13836 // For example: 0x000070F8 byte->double word
13837 // For LE: the allowed indices are: 0x0,0x8
13838 // For BE: the allowed indices are: 0x7,0xF
13839 uint64_t TargetElems[] = {
13840 0x3074B8FC, // b->w
13841 0x000070F8, // b->d
13842 0x10325476, // h->w
13843 0x00003074, // h->d
13844 0x00001032, // w->d
13845 };
13846
13847 uint64_t Elems = 0;
13848 int Index;
13849 SDValue Input;
13850
13851 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13852 if (!Op)
13853 return false;
13854 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13855 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13856 return false;
13857
13858 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13859 // of the right width.
13860 SDValue Extract = Op.getOperand(0);
13861 if (Extract.getOpcode() == ISD::ANY_EXTEND)
13862 Extract = Extract.getOperand(0);
13863 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13864 return false;
13865
13866 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13867 if (!ExtOp)
13868 return false;
13869
13870 Index = ExtOp->getZExtValue();
13871 if (Input && Input != Extract.getOperand(0))
13872 return false;
13873
13874 if (!Input)
13875 Input = Extract.getOperand(0);
13876
13877 Elems = Elems << 8;
13878 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13879 Elems |= Index;
13880
13881 return true;
13882 };
13883
13884 // If the build vector operands aren't sign extended vector extracts,
13885 // of the same input vector, then return.
13886 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13887 if (!isSExtOfVecExtract(N->getOperand(i))) {
13888 return SDValue();
13889 }
13890 }
13891
13892 // If the vector extract indicies are not correct, add the appropriate
13893 // vector_shuffle.
13894 int TgtElemArrayIdx;
13895 int InputSize = Input.getValueType().getScalarSizeInBits();
13896 int OutputSize = N->getValueType(0).getScalarSizeInBits();
13897 if (InputSize + OutputSize == 40)
13898 TgtElemArrayIdx = 0;
13899 else if (InputSize + OutputSize == 72)
13900 TgtElemArrayIdx = 1;
13901 else if (InputSize + OutputSize == 48)
13902 TgtElemArrayIdx = 2;
13903 else if (InputSize + OutputSize == 80)
13904 TgtElemArrayIdx = 3;
13905 else if (InputSize + OutputSize == 96)
13906 TgtElemArrayIdx = 4;
13907 else
13908 return SDValue();
13909
13910 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13911 CorrectElems = DAG.getDataLayout().isLittleEndian()
13912 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13913 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13914 if (Elems != CorrectElems) {
13915 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13916 }
13917
13918 // Regular lowering will catch cases where a shuffle is not needed.
13919 return SDValue();
13920}
13921
13922// Look for the pattern of a load from a narrow width to i128, feeding
13923// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13924// (LXVRZX). This node represents a zero extending load that will be matched
13925// to the Load VSX Vector Rightmost instructions.
13926static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13927 SDLoc DL(N);
13928
13929 // This combine is only eligible for a BUILD_VECTOR of v1i128.
13930 if (N->getValueType(0) != MVT::v1i128)
13931 return SDValue();
13932
13933 SDValue Operand = N->getOperand(0);
13934 // Proceed with the transformation if the operand to the BUILD_VECTOR
13935 // is a load instruction.
13936 if (Operand.getOpcode() != ISD::LOAD)
13937 return SDValue();
13938
13939 LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13940 EVT MemoryType = LD->getMemoryVT();
13941
13942 // This transformation is only valid if the we are loading either a byte,
13943 // halfword, word, or doubleword.
13944 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13945 MemoryType == MVT::i32 || MemoryType == MVT::i64;
13946
13947 // Ensure that the load from the narrow width is being zero extended to i128.
13948 if (!ValidLDType ||
13949 (LD->getExtensionType() != ISD::ZEXTLOAD &&
13950 LD->getExtensionType() != ISD::EXTLOAD))
13951 return SDValue();
13952
13953 SDValue LoadOps[] = {
13954 LD->getChain(), LD->getBasePtr(),
13955 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13956
13957 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
13958 DAG.getVTList(MVT::v1i128, MVT::Other),
13959 LoadOps, MemoryType, LD->getMemOperand());
13960}
13961
13962SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13963 DAGCombinerInfo &DCI) const {
13964 assert(N->getOpcode() == ISD::BUILD_VECTOR &&((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13965, __PRETTY_FUNCTION__))
13965 "Should be called with a BUILD_VECTOR node")((N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Should be called with a BUILD_VECTOR node\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 13965, __PRETTY_FUNCTION__))
;
13966
13967 SelectionDAG &DAG = DCI.DAG;
13968 SDLoc dl(N);
13969
13970 if (!Subtarget.hasVSX())
13971 return SDValue();
13972
13973 // The target independent DAG combiner will leave a build_vector of
13974 // float-to-int conversions intact. We can generate MUCH better code for
13975 // a float-to-int conversion of a vector of floats.
13976 SDValue FirstInput = N->getOperand(0);
13977 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13978 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13979 if (Reduced)
13980 return Reduced;
13981 }
13982
13983 // If we're building a vector out of consecutive loads, just load that
13984 // vector type.
13985 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13986 if (Reduced)
13987 return Reduced;
13988
13989 // If we're building a vector out of extended elements from another vector
13990 // we have P9 vector integer extend instructions. The code assumes legal
13991 // input types (i.e. it can't handle things like v4i16) so do not run before
13992 // legalization.
13993 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13994 Reduced = combineBVOfVecSExt(N, DAG);
13995 if (Reduced)
13996 return Reduced;
13997 }
13998
13999 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14000 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14001 // is a load from <valid narrow width> to i128.
14002 if (Subtarget.isISA3_1()) {
14003 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14004 if (BVOfZLoad)
14005 return BVOfZLoad;
14006 }
14007
14008 if (N->getValueType(0) != MVT::v2f64)
14009 return SDValue();
14010
14011 // Looking for:
14012 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14013 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14014 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14015 return SDValue();
14016 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14017 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14018 return SDValue();
14019 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14020 return SDValue();
14021
14022 SDValue Ext1 = FirstInput.getOperand(0);
14023 SDValue Ext2 = N->getOperand(1).getOperand(0);
14024 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14025 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14026 return SDValue();
14027
14028 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14029 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14030 if (!Ext1Op || !Ext2Op)
14031 return SDValue();
14032 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14033 Ext1.getOperand(0) != Ext2.getOperand(0))
14034 return SDValue();
14035
14036 int FirstElem = Ext1Op->getZExtValue();
14037 int SecondElem = Ext2Op->getZExtValue();
14038 int SubvecIdx;
14039 if (FirstElem == 0 && SecondElem == 1)
14040 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14041 else if (FirstElem == 2 && SecondElem == 3)
14042 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14043 else
14044 return SDValue();
14045
14046 SDValue SrcVec = Ext1.getOperand(0);
14047 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14048 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14049 return DAG.getNode(NodeType, dl, MVT::v2f64,
14050 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14051}
14052
14053SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14054 DAGCombinerInfo &DCI) const {
14055 assert((N->getOpcode() == ISD::SINT_TO_FP ||(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14057, __PRETTY_FUNCTION__))
14056 N->getOpcode() == ISD::UINT_TO_FP) &&(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14057, __PRETTY_FUNCTION__))
14057 "Need an int -> FP conversion node here")(((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() ==
ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && \"Need an int -> FP conversion node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14057, __PRETTY_FUNCTION__))
;
14058
14059 if (useSoftFloat() || !Subtarget.has64BitSupport())
14060 return SDValue();
14061
14062 SelectionDAG &DAG = DCI.DAG;
14063 SDLoc dl(N);
14064 SDValue Op(N, 0);
14065
14066 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14067 // from the hardware.
14068 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14069 return SDValue();
14070 if (!Op.getOperand(0).getValueType().isSimple())
14071 return SDValue();
14072 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14073 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14074 return SDValue();
14075
14076 SDValue FirstOperand(Op.getOperand(0));
14077 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14078 (FirstOperand.getValueType() == MVT::i8 ||
14079 FirstOperand.getValueType() == MVT::i16);
14080 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14081 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14082 bool DstDouble = Op.getValueType() == MVT::f64;
14083 unsigned ConvOp = Signed ?
14084 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14085 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14086 SDValue WidthConst =
14087 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14088 dl, false);
14089 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14090 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14091 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14092 DAG.getVTList(MVT::f64, MVT::Other),
14093 Ops, MVT::i8, LDN->getMemOperand());
14094
14095 // For signed conversion, we need to sign-extend the value in the VSR
14096 if (Signed) {
14097 SDValue ExtOps[] = { Ld, WidthConst };
14098 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14099 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14100 } else
14101 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14102 }
14103
14104
14105 // For i32 intermediate values, unfortunately, the conversion functions
14106 // leave the upper 32 bits of the value are undefined. Within the set of
14107 // scalar instructions, we have no method for zero- or sign-extending the
14108 // value. Thus, we cannot handle i32 intermediate values here.
14109 if (Op.getOperand(0).getValueType() == MVT::i32)
14110 return SDValue();
14111
14112 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14113, __PRETTY_FUNCTION__))
14113 "UINT_TO_FP is supported only with FPCVT")(((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT())
&& "UINT_TO_FP is supported only with FPCVT") ? static_cast
<void> (0) : __assert_fail ("(Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && \"UINT_TO_FP is supported only with FPCVT\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14113, __PRETTY_FUNCTION__))
;
14114
14115 // If we have FCFIDS, then use it when converting to single-precision.
14116 // Otherwise, convert to double-precision and then round.
14117 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14118 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14119 : PPCISD::FCFIDS)
14120 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14121 : PPCISD::FCFID);
14122 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14123 ? MVT::f32
14124 : MVT::f64;
14125
14126 // If we're converting from a float, to an int, and back to a float again,
14127 // then we don't need the store/load pair at all.
14128 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14129 Subtarget.hasFPCVT()) ||
14130 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14131 SDValue Src = Op.getOperand(0).getOperand(0);
14132 if (Src.getValueType() == MVT::f32) {
14133 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14134 DCI.AddToWorklist(Src.getNode());
14135 } else if (Src.getValueType() != MVT::f64) {
14136 // Make sure that we don't pick up a ppc_fp128 source value.
14137 return SDValue();
14138 }
14139
14140 unsigned FCTOp =
14141 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14142 PPCISD::FCTIDUZ;
14143
14144 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14145 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14146
14147 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14148 FP = DAG.getNode(ISD::FP_ROUND, dl,
14149 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14150 DCI.AddToWorklist(FP.getNode());
14151 }
14152
14153 return FP;
14154 }
14155
14156 return SDValue();
14157}
14158
14159// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14160// builtins) into loads with swaps.
14161SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14162 DAGCombinerInfo &DCI) const {
14163 SelectionDAG &DAG = DCI.DAG;
14164 SDLoc dl(N);
14165 SDValue Chain;
14166 SDValue Base;
14167 MachineMemOperand *MMO;
14168
14169 switch (N->getOpcode()) {
14170 default:
14171 llvm_unreachable("Unexpected opcode for little endian VSX load")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX load"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14171)
;
14172 case ISD::LOAD: {
14173 LoadSDNode *LD = cast<LoadSDNode>(N);
14174 Chain = LD->getChain();
14175 Base = LD->getBasePtr();
14176 MMO = LD->getMemOperand();
14177 // If the MMO suggests this isn't a load of a full vector, leave
14178 // things alone. For a built-in, we have to make the change for
14179 // correctness, so if there is a size problem that will be a bug.
14180 if (MMO->getSize() < 16)
14181 return SDValue();
14182 break;
14183 }
14184 case ISD::INTRINSIC_W_CHAIN: {
14185 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14186 Chain = Intrin->getChain();
14187 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14188 // us what we want. Get operand 2 instead.
14189 Base = Intrin->getOperand(2);
14190 MMO = Intrin->getMemOperand();
14191 break;
14192 }
14193 }
14194
14195 MVT VecTy = N->getValueType(0).getSimpleVT();
14196
14197 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14198 // aligned and the type is a vector with elements up to 4 bytes
14199 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14200 VecTy.getScalarSizeInBits() <= 32) {
14201 return SDValue();
14202 }
14203
14204 SDValue LoadOps[] = { Chain, Base };
14205 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14206 DAG.getVTList(MVT::v2f64, MVT::Other),
14207 LoadOps, MVT::v2f64, MMO);
14208
14209 DCI.AddToWorklist(Load.getNode());
14210 Chain = Load.getValue(1);
14211 SDValue Swap = DAG.getNode(
14212 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14213 DCI.AddToWorklist(Swap.getNode());
14214
14215 // Add a bitcast if the resulting load type doesn't match v2f64.
14216 if (VecTy != MVT::v2f64) {
14217 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14218 DCI.AddToWorklist(N.getNode());
14219 // Package {bitcast value, swap's chain} to match Load's shape.
14220 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14221 N, Swap.getValue(1));
14222 }
14223
14224 return Swap;
14225}
14226
14227// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14228// builtins) into stores with swaps.
14229SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14230 DAGCombinerInfo &DCI) const {
14231 SelectionDAG &DAG = DCI.DAG;
14232 SDLoc dl(N);
14233 SDValue Chain;
14234 SDValue Base;
14235 unsigned SrcOpnd;
14236 MachineMemOperand *MMO;
14237
14238 switch (N->getOpcode()) {
14239 default:
14240 llvm_unreachable("Unexpected opcode for little endian VSX store")::llvm::llvm_unreachable_internal("Unexpected opcode for little endian VSX store"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14240)
;
14241 case ISD::STORE: {
14242 StoreSDNode *ST = cast<StoreSDNode>(N);
14243 Chain = ST->getChain();
14244 Base = ST->getBasePtr();
14245 MMO = ST->getMemOperand();
14246 SrcOpnd = 1;
14247 // If the MMO suggests this isn't a store of a full vector, leave
14248 // things alone. For a built-in, we have to make the change for
14249 // correctness, so if there is a size problem that will be a bug.
14250 if (MMO->getSize() < 16)
14251 return SDValue();
14252 break;
14253 }
14254 case ISD::INTRINSIC_VOID: {
14255 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14256 Chain = Intrin->getChain();
14257 // Intrin->getBasePtr() oddly does not get what we want.
14258 Base = Intrin->getOperand(3);
14259 MMO = Intrin->getMemOperand();
14260 SrcOpnd = 2;
14261 break;
14262 }
14263 }
14264
14265 SDValue Src = N->getOperand(SrcOpnd);
14266 MVT VecTy = Src.getValueType().getSimpleVT();
14267
14268 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14269 // aligned and the type is a vector with elements up to 4 bytes
14270 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14271 VecTy.getScalarSizeInBits() <= 32) {
14272 return SDValue();
14273 }
14274
14275 // All stores are done as v2f64 and possible bit cast.
14276 if (VecTy != MVT::v2f64) {
14277 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14278 DCI.AddToWorklist(Src.getNode());
14279 }
14280
14281 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14282 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14283 DCI.AddToWorklist(Swap.getNode());
14284 Chain = Swap.getValue(1);
14285 SDValue StoreOps[] = { Chain, Swap, Base };
14286 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14287 DAG.getVTList(MVT::Other),
14288 StoreOps, VecTy, MMO);
14289 DCI.AddToWorklist(Store.getNode());
14290 return Store;
14291}
14292
14293// Handle DAG combine for STORE (FP_TO_INT F).
14294SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14295 DAGCombinerInfo &DCI) const {
14296
14297 SelectionDAG &DAG = DCI.DAG;
14298 SDLoc dl(N);
14299 unsigned Opcode = N->getOperand(1).getOpcode();
14300
14301 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14302, __PRETTY_FUNCTION__))
14302 && "Not a FP_TO_INT Instruction!")(((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) &&
"Not a FP_TO_INT Instruction!") ? static_cast<void> (0
) : __assert_fail ("(Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && \"Not a FP_TO_INT Instruction!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14302, __PRETTY_FUNCTION__))
;
14303
14304 SDValue Val = N->getOperand(1).getOperand(0);
14305 EVT Op1VT = N->getOperand(1).getValueType();
14306 EVT ResVT = Val.getValueType();
14307
14308 if (!isTypeLegal(ResVT))
14309 return SDValue();
14310
14311 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14312 bool ValidTypeForStoreFltAsInt =
14313 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14314 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14315
14316 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14317 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14318 return SDValue();
14319
14320 // Extend f32 values to f64
14321 if (ResVT.getScalarSizeInBits() == 32) {
14322 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14323 DCI.AddToWorklist(Val.getNode());
14324 }
14325
14326 // Set signed or unsigned conversion opcode.
14327 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14328 PPCISD::FP_TO_SINT_IN_VSR :
14329 PPCISD::FP_TO_UINT_IN_VSR;
14330
14331 Val = DAG.getNode(ConvOpcode,
14332 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14333 DCI.AddToWorklist(Val.getNode());
14334
14335 // Set number of bytes being converted.
14336 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14337 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14338 DAG.getIntPtrConstant(ByteSize, dl, false),
14339 DAG.getValueType(Op1VT) };
14340
14341 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14342 DAG.getVTList(MVT::Other), Ops,
14343 cast<StoreSDNode>(N)->getMemoryVT(),
14344 cast<StoreSDNode>(N)->getMemOperand());
14345
14346 DCI.AddToWorklist(Val.getNode());
14347 return Val;
14348}
14349
14350static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14351 // Check that the source of the element keeps flipping
14352 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14353 bool PrevElemFromFirstVec = Mask[0] < NumElts;
14354 for (int i = 1, e = Mask.size(); i < e; i++) {
14355 if (PrevElemFromFirstVec && Mask[i] < NumElts)
14356 return false;
14357 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14358 return false;
14359 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14360 }
14361 return true;
14362}
14363
14364static bool isSplatBV(SDValue Op) {
14365 if (Op.getOpcode() != ISD::BUILD_VECTOR)
14366 return false;
14367 SDValue FirstOp;
14368
14369 // Find first non-undef input.
14370 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14371 FirstOp = Op.getOperand(i);
14372 if (!FirstOp.isUndef())
14373 break;
14374 }
14375
14376 // All inputs are undef or the same as the first non-undef input.
14377 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14378 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14379 return false;
14380 return true;
14381}
14382
14383static SDValue isScalarToVec(SDValue Op) {
14384 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14385 return Op;
14386 if (Op.getOpcode() != ISD::BITCAST)
14387 return SDValue();
14388 Op = Op.getOperand(0);
14389 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14390 return Op;
14391 return SDValue();
14392}
14393
14394static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14395 int LHSMaxIdx, int RHSMinIdx,
14396 int RHSMaxIdx, int HalfVec) {
14397 for (int i = 0, e = ShuffV.size(); i < e; i++) {
14398 int Idx = ShuffV[i];
14399 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14400 ShuffV[i] += HalfVec;
14401 }
14402 return;
14403}
14404
14405// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14406// the original is:
14407// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14408// In such a case, just change the shuffle mask to extract the element
14409// from the permuted index.
14410static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14411 SDLoc dl(OrigSToV);
14412 EVT VT = OrigSToV.getValueType();
14413 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14414, __PRETTY_FUNCTION__))
14414 "Expecting a SCALAR_TO_VECTOR here")((OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"
) ? static_cast<void> (0) : __assert_fail ("OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && \"Expecting a SCALAR_TO_VECTOR here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14414, __PRETTY_FUNCTION__))
;
14415 SDValue Input = OrigSToV.getOperand(0);
14416
14417 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14418 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14419 SDValue OrigVector = Input.getOperand(0);
14420
14421 // Can't handle non-const element indices or different vector types
14422 // for the input to the extract and the output of the scalar_to_vector.
14423 if (Idx && VT == OrigVector.getValueType()) {
14424 SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14425 NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14426 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14427 }
14428 }
14429 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14430 OrigSToV.getOperand(0));
14431}
14432
14433// On little endian subtargets, combine shuffles such as:
14434// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14435// into:
14436// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14437// because the latter can be matched to a single instruction merge.
14438// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14439// to put the value into element zero. Adjust the shuffle mask so that the
14440// vector can remain in permuted form (to prevent a swap prior to a shuffle).
14441SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14442 SelectionDAG &DAG) const {
14443 SDValue LHS = SVN->getOperand(0);
14444 SDValue RHS = SVN->getOperand(1);
14445 auto Mask = SVN->getMask();
14446 int NumElts = LHS.getValueType().getVectorNumElements();
14447 SDValue Res(SVN, 0);
14448 SDLoc dl(SVN);
14449
14450 // None of these combines are useful on big endian systems since the ISA
14451 // already has a big endian bias.
14452 if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14453 return Res;
14454
14455 // If this is not a shuffle of a shuffle and the first element comes from
14456 // the second vector, canonicalize to the commuted form. This will make it
14457 // more likely to match one of the single instruction patterns.
14458 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14459 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14460 std::swap(LHS, RHS);
14461 Res = DAG.getCommutedVectorShuffle(*SVN);
14462 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14463 }
14464
14465 // Adjust the shuffle mask if either input vector comes from a
14466 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14467 // form (to prevent the need for a swap).
14468 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14469 SDValue SToVLHS = isScalarToVec(LHS);
14470 SDValue SToVRHS = isScalarToVec(RHS);
14471 if (SToVLHS || SToVRHS) {
14472 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14473 : SToVRHS.getValueType().getVectorNumElements();
14474 int NumEltsOut = ShuffV.size();
14475
14476 // Initially assume that neither input is permuted. These will be adjusted
14477 // accordingly if either input is.
14478 int LHSMaxIdx = -1;
14479 int RHSMinIdx = -1;
14480 int RHSMaxIdx = -1;
14481 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14482
14483 // Get the permuted scalar to vector nodes for the source(s) that come from
14484 // ISD::SCALAR_TO_VECTOR.
14485 if (SToVLHS) {
14486 // Set up the values for the shuffle vector fixup.
14487 LHSMaxIdx = NumEltsOut / NumEltsIn;
14488 SToVLHS = getSToVPermuted(SToVLHS, DAG);
14489 if (SToVLHS.getValueType() != LHS.getValueType())
14490 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14491 LHS = SToVLHS;
14492 }
14493 if (SToVRHS) {
14494 RHSMinIdx = NumEltsOut;
14495 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14496 SToVRHS = getSToVPermuted(SToVRHS, DAG);
14497 if (SToVRHS.getValueType() != RHS.getValueType())
14498 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14499 RHS = SToVRHS;
14500 }
14501
14502 // Fix up the shuffle mask to reflect where the desired element actually is.
14503 // The minimum and maximum indices that correspond to element zero for both
14504 // the LHS and RHS are computed and will control which shuffle mask entries
14505 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14506 // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14507 // HalfVec to refer to the corresponding element in the permuted vector.
14508 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14509 HalfVec);
14510 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14511
14512 // We may have simplified away the shuffle. We won't be able to do anything
14513 // further with it here.
14514 if (!isa<ShuffleVectorSDNode>(Res))
14515 return Res;
14516 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14517 }
14518
14519 // The common case after we commuted the shuffle is that the RHS is a splat
14520 // and we have elements coming in from the splat at indices that are not
14521 // conducive to using a merge.
14522 // Example:
14523 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14524 if (!isSplatBV(RHS))
14525 return Res;
14526
14527 // We are looking for a mask such that all even elements are from
14528 // one vector and all odd elements from the other.
14529 if (!isAlternatingShuffMask(Mask, NumElts))
14530 return Res;
14531
14532 // Adjust the mask so we are pulling in the same index from the splat
14533 // as the index from the interesting vector in consecutive elements.
14534 // Example (even elements from first vector):
14535 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14536 if (Mask[0] < NumElts)
14537 for (int i = 1, e = Mask.size(); i < e; i += 2)
14538 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14539 // Example (odd elements from first vector):
14540 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14541 else
14542 for (int i = 0, e = Mask.size(); i < e; i += 2)
14543 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14544
14545 // If the RHS has undefs, we need to remove them since we may have created
14546 // a shuffle that adds those instead of the splat value.
14547 SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14548 RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14549
14550 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14551 return Res;
14552}
14553
14554SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14555 LSBaseSDNode *LSBase,
14556 DAGCombinerInfo &DCI) const {
14557 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14558, __PRETTY_FUNCTION__))
14558 "Not a reverse memop pattern!")(((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
"Not a reverse memop pattern!") ? static_cast<void> (0
) : __assert_fail ("(ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && \"Not a reverse memop pattern!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14558, __PRETTY_FUNCTION__))
;
14559
14560 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14561 auto Mask = SVN->getMask();
14562 int i = 0;
14563 auto I = Mask.rbegin();
14564 auto E = Mask.rend();
14565
14566 for (; I != E; ++I) {
14567 if (*I != i)
14568 return false;
14569 i++;
14570 }
14571 return true;
14572 };
14573
14574 SelectionDAG &DAG = DCI.DAG;
14575 EVT VT = SVN->getValueType(0);
14576
14577 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14578 return SDValue();
14579
14580 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14581 // See comment in PPCVSXSwapRemoval.cpp.
14582 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14583 if (!Subtarget.hasP9Vector())
14584 return SDValue();
14585
14586 if(!IsElementReverse(SVN))
14587 return SDValue();
14588
14589 if (LSBase->getOpcode() == ISD::LOAD) {
14590 SDLoc dl(SVN);
14591 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14592 return DAG.getMemIntrinsicNode(
14593 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14594 LSBase->getMemoryVT(), LSBase->getMemOperand());
14595 }
14596
14597 if (LSBase->getOpcode() == ISD::STORE) {
14598 SDLoc dl(LSBase);
14599 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14600 LSBase->getBasePtr()};
14601 return DAG.getMemIntrinsicNode(
14602 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14603 LSBase->getMemoryVT(), LSBase->getMemOperand());
14604 }
14605
14606 llvm_unreachable("Expected a load or store node here")::llvm::llvm_unreachable_internal("Expected a load or store node here"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14606)
;
14607}
14608
14609SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14610 DAGCombinerInfo &DCI) const {
14611 SelectionDAG &DAG = DCI.DAG;
14612 SDLoc dl(N);
14613 switch (N->getOpcode()) {
14614 default: break;
14615 case ISD::ADD:
14616 return combineADD(N, DCI);
14617 case ISD::SHL:
14618 return combineSHL(N, DCI);
14619 case ISD::SRA:
14620 return combineSRA(N, DCI);
14621 case ISD::SRL:
14622 return combineSRL(N, DCI);
14623 case ISD::MUL:
14624 return combineMUL(N, DCI);
14625 case ISD::FMA:
14626 case PPCISD::FNMSUB:
14627 return combineFMALike(N, DCI);
14628 case PPCISD::SHL:
14629 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14630 return N->getOperand(0);
14631 break;
14632 case PPCISD::SRL:
14633 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14634 return N->getOperand(0);
14635 break;
14636 case PPCISD::SRA:
14637 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14638 if (C->isNullValue() || // 0 >>s V -> 0.
14639 C->isAllOnesValue()) // -1 >>s V -> -1.
14640 return N->getOperand(0);
14641 }
14642 break;
14643 case ISD::SIGN_EXTEND:
14644 case ISD::ZERO_EXTEND:
14645 case ISD::ANY_EXTEND:
14646 return DAGCombineExtBoolTrunc(N, DCI);
14647 case ISD::TRUNCATE:
14648 return combineTRUNCATE(N, DCI);
14649 case ISD::SETCC:
14650 if (SDValue CSCC = combineSetCC(N, DCI))
14651 return CSCC;
14652 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14653 case ISD::SELECT_CC:
14654 return DAGCombineTruncBoolExt(N, DCI);
14655 case ISD::SINT_TO_FP:
14656 case ISD::UINT_TO_FP:
14657 return combineFPToIntToFP(N, DCI);
14658 case ISD::VECTOR_SHUFFLE:
14659 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14660 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14661 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14662 }
14663 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14664 case ISD::STORE: {
14665
14666 EVT Op1VT = N->getOperand(1).getValueType();
14667 unsigned Opcode = N->getOperand(1).getOpcode();
14668
14669 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14670 SDValue Val= combineStoreFPToInt(N, DCI);
14671 if (Val)
14672 return Val;
14673 }
14674
14675 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14676 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14677 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14678 if (Val)
14679 return Val;
14680 }
14681
14682 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14683 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14684 N->getOperand(1).getNode()->hasOneUse() &&
14685 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14686 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14687
14688 // STBRX can only handle simple types and it makes no sense to store less
14689 // two bytes in byte-reversed order.
14690 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14691 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14692 break;
14693
14694 SDValue BSwapOp = N->getOperand(1).getOperand(0);
14695 // Do an any-extend to 32-bits if this is a half-word input.
14696 if (BSwapOp.getValueType() == MVT::i16)
14697 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14698
14699 // If the type of BSWAP operand is wider than stored memory width
14700 // it need to be shifted to the right side before STBRX.
14701 if (Op1VT.bitsGT(mVT)) {
14702 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14703 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14704 DAG.getConstant(Shift, dl, MVT::i32));
14705 // Need to truncate if this is a bswap of i64 stored as i32/i16.
14706 if (Op1VT == MVT::i64)
14707 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14708 }
14709
14710 SDValue Ops[] = {
14711 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14712 };
14713 return
14714 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14715 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14716 cast<StoreSDNode>(N)->getMemOperand());
14717 }
14718
14719 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14720 // So it can increase the chance of CSE constant construction.
14721 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14722 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14723 // Need to sign-extended to 64-bits to handle negative values.
14724 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14725 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14726 MemVT.getSizeInBits());
14727 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14728
14729 // DAG.getTruncStore() can't be used here because it doesn't accept
14730 // the general (base + offset) addressing mode.
14731 // So we use UpdateNodeOperands and setTruncatingStore instead.
14732 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14733 N->getOperand(3));
14734 cast<StoreSDNode>(N)->setTruncatingStore(true);
14735 return SDValue(N, 0);
14736 }
14737
14738 // For little endian, VSX stores require generating xxswapd/lxvd2x.
14739 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14740 if (Op1VT.isSimple()) {
14741 MVT StoreVT = Op1VT.getSimpleVT();
14742 if (Subtarget.needsSwapsForVSXMemOps() &&
14743 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14744 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14745 return expandVSXStoreForLE(N, DCI);
14746 }
14747 break;
14748 }
14749 case ISD::LOAD: {
14750 LoadSDNode *LD = cast<LoadSDNode>(N);
14751 EVT VT = LD->getValueType(0);
14752
14753 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14754 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14755 if (VT.isSimple()) {
14756 MVT LoadVT = VT.getSimpleVT();
14757 if (Subtarget.needsSwapsForVSXMemOps() &&
14758 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14759 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14760 return expandVSXLoadForLE(N, DCI);
14761 }
14762
14763 // We sometimes end up with a 64-bit integer load, from which we extract
14764 // two single-precision floating-point numbers. This happens with
14765 // std::complex<float>, and other similar structures, because of the way we
14766 // canonicalize structure copies. However, if we lack direct moves,
14767 // then the final bitcasts from the extracted integer values to the
14768 // floating-point numbers turn into store/load pairs. Even with direct moves,
14769 // just loading the two floating-point numbers is likely better.
14770 auto ReplaceTwoFloatLoad = [&]() {
14771 if (VT != MVT::i64)
14772 return false;
14773
14774 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14775 LD->isVolatile())
14776 return false;
14777
14778 // We're looking for a sequence like this:
14779 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14780 // t16: i64 = srl t13, Constant:i32<32>
14781 // t17: i32 = truncate t16
14782 // t18: f32 = bitcast t17
14783 // t19: i32 = truncate t13
14784 // t20: f32 = bitcast t19
14785
14786 if (!LD->hasNUsesOfValue(2, 0))
14787 return false;
14788
14789 auto UI = LD->use_begin();
14790 while (UI.getUse().getResNo() != 0) ++UI;
14791 SDNode *Trunc = *UI++;
14792 while (UI.getUse().getResNo() != 0) ++UI;
14793 SDNode *RightShift = *UI;
14794 if (Trunc->getOpcode() != ISD::TRUNCATE)
14795 std::swap(Trunc, RightShift);
14796
14797 if (Trunc->getOpcode() != ISD::TRUNCATE ||
14798 Trunc->getValueType(0) != MVT::i32 ||
14799 !Trunc->hasOneUse())
14800 return false;
14801 if (RightShift->getOpcode() != ISD::SRL ||
14802 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14803 RightShift->getConstantOperandVal(1) != 32 ||
14804 !RightShift->hasOneUse())
14805 return false;
14806
14807 SDNode *Trunc2 = *RightShift->use_begin();
14808 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14809 Trunc2->getValueType(0) != MVT::i32 ||
14810 !Trunc2->hasOneUse())
14811 return false;
14812
14813 SDNode *Bitcast = *Trunc->use_begin();
14814 SDNode *Bitcast2 = *Trunc2->use_begin();
14815
14816 if (Bitcast->getOpcode() != ISD::BITCAST ||
14817 Bitcast->getValueType(0) != MVT::f32)
14818 return false;
14819 if (Bitcast2->getOpcode() != ISD::BITCAST ||
14820 Bitcast2->getValueType(0) != MVT::f32)
14821 return false;
14822
14823 if (Subtarget.isLittleEndian())
14824 std::swap(Bitcast, Bitcast2);
14825
14826 // Bitcast has the second float (in memory-layout order) and Bitcast2
14827 // has the first one.
14828
14829 SDValue BasePtr = LD->getBasePtr();
14830 if (LD->isIndexed()) {
14831 assert(LD->getAddressingMode() == ISD::PRE_INC &&((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14832, __PRETTY_FUNCTION__))
14832 "Non-pre-inc AM on PPC?")((LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"
) ? static_cast<void> (0) : __assert_fail ("LD->getAddressingMode() == ISD::PRE_INC && \"Non-pre-inc AM on PPC?\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 14832, __PRETTY_FUNCTION__))
;
14833 BasePtr =
14834 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14835 LD->getOffset());
14836 }
14837
14838 auto MMOFlags =
14839 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14840 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14841 LD->getPointerInfo(), LD->getAlignment(),
14842 MMOFlags, LD->getAAInfo());
14843 SDValue AddPtr =
14844 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14845 BasePtr, DAG.getIntPtrConstant(4, dl));
14846 SDValue FloatLoad2 = DAG.getLoad(
14847 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14848 LD->getPointerInfo().getWithOffset(4),
14849 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14850
14851 if (LD->isIndexed()) {
14852 // Note that DAGCombine should re-form any pre-increment load(s) from
14853 // what is produced here if that makes sense.
14854 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14855 }
14856
14857 DCI.CombineTo(Bitcast2, FloatLoad);
14858 DCI.CombineTo(Bitcast, FloatLoad2);
14859
14860 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14861 SDValue(FloatLoad2.getNode(), 1));
14862 return true;
14863 };
14864
14865 if (ReplaceTwoFloatLoad())
14866 return SDValue(N, 0);
14867
14868 EVT MemVT = LD->getMemoryVT();
14869 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14870 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14871 if (LD->isUnindexed() && VT.isVector() &&
14872 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14873 // P8 and later hardware should just use LOAD.
14874 !Subtarget.hasP8Vector() &&
14875 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14876 VT == MVT::v4f32))) &&
14877 LD->getAlign() < ABIAlignment) {
14878 // This is a type-legal unaligned Altivec load.
14879 SDValue Chain = LD->getChain();
14880 SDValue Ptr = LD->getBasePtr();
14881 bool isLittleEndian = Subtarget.isLittleEndian();
14882
14883 // This implements the loading of unaligned vectors as described in
14884 // the venerable Apple Velocity Engine overview. Specifically:
14885 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14886 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14887 //
14888 // The general idea is to expand a sequence of one or more unaligned
14889 // loads into an alignment-based permutation-control instruction (lvsl
14890 // or lvsr), a series of regular vector loads (which always truncate
14891 // their input address to an aligned address), and a series of
14892 // permutations. The results of these permutations are the requested
14893 // loaded values. The trick is that the last "extra" load is not taken
14894 // from the address you might suspect (sizeof(vector) bytes after the
14895 // last requested load), but rather sizeof(vector) - 1 bytes after the
14896 // last requested vector. The point of this is to avoid a page fault if
14897 // the base address happened to be aligned. This works because if the
14898 // base address is aligned, then adding less than a full vector length
14899 // will cause the last vector in the sequence to be (re)loaded.
14900 // Otherwise, the next vector will be fetched as you might suspect was
14901 // necessary.
14902
14903 // We might be able to reuse the permutation generation from
14904 // a different base address offset from this one by an aligned amount.
14905 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14906 // optimization later.
14907 Intrinsic::ID Intr, IntrLD, IntrPerm;
14908 MVT PermCntlTy, PermTy, LDTy;
14909 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14910 : Intrinsic::ppc_altivec_lvsl;
14911 IntrLD = Intrinsic::ppc_altivec_lvx;
14912 IntrPerm = Intrinsic::ppc_altivec_vperm;
14913 PermCntlTy = MVT::v16i8;
14914 PermTy = MVT::v4i32;
14915 LDTy = MVT::v4i32;
14916
14917 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14918
14919 // Create the new MMO for the new base load. It is like the original MMO,
14920 // but represents an area in memory almost twice the vector size centered
14921 // on the original address. If the address is unaligned, we might start
14922 // reading up to (sizeof(vector)-1) bytes below the address of the
14923 // original unaligned load.
14924 MachineFunction &MF = DAG.getMachineFunction();
14925 MachineMemOperand *BaseMMO =
14926 MF.getMachineMemOperand(LD->getMemOperand(),
14927 -(long)MemVT.getStoreSize()+1,
14928 2*MemVT.getStoreSize()-1);
14929
14930 // Create the new base load.
14931 SDValue LDXIntID =
14932 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14933 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14934 SDValue BaseLoad =
14935 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14936 DAG.getVTList(PermTy, MVT::Other),
14937 BaseLoadOps, LDTy, BaseMMO);
14938
14939 // Note that the value of IncOffset (which is provided to the next
14940 // load's pointer info offset value, and thus used to calculate the
14941 // alignment), and the value of IncValue (which is actually used to
14942 // increment the pointer value) are different! This is because we
14943 // require the next load to appear to be aligned, even though it
14944 // is actually offset from the base pointer by a lesser amount.
14945 int IncOffset = VT.getSizeInBits() / 8;
14946 int IncValue = IncOffset;
14947
14948 // Walk (both up and down) the chain looking for another load at the real
14949 // (aligned) offset (the alignment of the other load does not matter in
14950 // this case). If found, then do not use the offset reduction trick, as
14951 // that will prevent the loads from being later combined (as they would
14952 // otherwise be duplicates).
14953 if (!findConsecutiveLoad(LD, DAG))
14954 --IncValue;
14955
14956 SDValue Increment =
14957 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14958 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14959
14960 MachineMemOperand *ExtraMMO =
14961 MF.getMachineMemOperand(LD->getMemOperand(),
14962 1, 2*MemVT.getStoreSize()-1);
14963 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14964 SDValue ExtraLoad =
14965 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14966 DAG.getVTList(PermTy, MVT::Other),
14967 ExtraLoadOps, LDTy, ExtraMMO);
14968
14969 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14970 BaseLoad.getValue(1), ExtraLoad.getValue(1));
14971
14972 // Because vperm has a big-endian bias, we must reverse the order
14973 // of the input vectors and complement the permute control vector
14974 // when generating little endian code. We have already handled the
14975 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14976 // and ExtraLoad here.
14977 SDValue Perm;
14978 if (isLittleEndian)
14979 Perm = BuildIntrinsicOp(IntrPerm,
14980 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14981 else
14982 Perm = BuildIntrinsicOp(IntrPerm,
14983 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14984
14985 if (VT != PermTy)
14986 Perm = Subtarget.hasAltivec()
14987 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14988 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14989 DAG.getTargetConstant(1, dl, MVT::i64));
14990 // second argument is 1 because this rounding
14991 // is always exact.
14992
14993 // The output of the permutation is our loaded result, the TokenFactor is
14994 // our new chain.
14995 DCI.CombineTo(N, Perm, TF);
14996 return SDValue(N, 0);
14997 }
14998 }
14999 break;
15000 case ISD::INTRINSIC_WO_CHAIN: {
15001 bool isLittleEndian = Subtarget.isLittleEndian();
15002 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15003 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15004 : Intrinsic::ppc_altivec_lvsl);
15005 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15006 SDValue Add = N->getOperand(1);
15007
15008 int Bits = 4 /* 16 byte alignment */;
15009
15010 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15011 APInt::getAllOnesValue(Bits /* alignment */)
15012 .zext(Add.getScalarValueSizeInBits()))) {
15013 SDNode *BasePtr = Add->getOperand(0).getNode();
15014 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15015 UE = BasePtr->use_end();
15016 UI != UE; ++UI) {
15017 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15018 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15019 IID) {
15020 // We've found another LVSL/LVSR, and this address is an aligned
15021 // multiple of that one. The results will be the same, so use the
15022 // one we've just found instead.
15023
15024 return SDValue(*UI, 0);
15025 }
15026 }
15027 }
15028
15029 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15030 SDNode *BasePtr = Add->getOperand(0).getNode();
15031 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15032 UE = BasePtr->use_end(); UI != UE; ++UI) {
15033 if (UI->getOpcode() == ISD::ADD &&
15034 isa<ConstantSDNode>(UI->getOperand(1)) &&
15035 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15036 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15037 (1ULL << Bits) == 0) {
15038 SDNode *OtherAdd = *UI;
15039 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15040 VE = OtherAdd->use_end(); VI != VE; ++VI) {
15041 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15042 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15043 return SDValue(*VI, 0);
15044 }
15045 }
15046 }
15047 }
15048 }
15049 }
15050
15051 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15052 // Expose the vabsduw/h/b opportunity for down stream
15053 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15054 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15055 IID == Intrinsic::ppc_altivec_vmaxsh ||
15056 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15057 SDValue V1 = N->getOperand(1);
15058 SDValue V2 = N->getOperand(2);
15059 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15060 V1.getSimpleValueType() == MVT::v8i16 ||
15061 V1.getSimpleValueType() == MVT::v16i8) &&
15062 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15063 // (0-a, a)
15064 if (V1.getOpcode() == ISD::SUB &&
15065 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15066 V1.getOperand(1) == V2) {
15067 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15068 }
15069 // (a, 0-a)
15070 if (V2.getOpcode() == ISD::SUB &&
15071 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15072 V2.getOperand(1) == V1) {
15073 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15074 }
15075 // (x-y, y-x)
15076 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15077 V1.getOperand(0) == V2.getOperand(1) &&
15078 V1.getOperand(1) == V2.getOperand(0)) {
15079 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15080 }
15081 }
15082 }
15083 }
15084
15085 break;
15086 case ISD::INTRINSIC_W_CHAIN:
15087 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15088 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15089 if (Subtarget.needsSwapsForVSXMemOps()) {
15090 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15091 default:
15092 break;
15093 case Intrinsic::ppc_vsx_lxvw4x:
15094 case Intrinsic::ppc_vsx_lxvd2x:
15095 return expandVSXLoadForLE(N, DCI);
15096 }
15097 }
15098 break;
15099 case ISD::INTRINSIC_VOID:
15100 // For little endian, VSX stores require generating xxswapd/stxvd2x.
15101 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15102 if (Subtarget.needsSwapsForVSXMemOps()) {
15103 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15104 default:
15105 break;
15106 case Intrinsic::ppc_vsx_stxvw4x:
15107 case Intrinsic::ppc_vsx_stxvd2x:
15108 return expandVSXStoreForLE(N, DCI);
15109 }
15110 }
15111 break;
15112 case ISD::BSWAP:
15113 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15114 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15115 N->getOperand(0).hasOneUse() &&
15116 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15117 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15118 N->getValueType(0) == MVT::i64))) {
15119 SDValue Load = N->getOperand(0);
15120 LoadSDNode *LD = cast<LoadSDNode>(Load);
15121 // Create the byte-swapping load.
15122 SDValue Ops[] = {
15123 LD->getChain(), // Chain
15124 LD->getBasePtr(), // Ptr
15125 DAG.getValueType(N->getValueType(0)) // VT
15126 };
15127 SDValue BSLoad =
15128 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15129 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15130 MVT::i64 : MVT::i32, MVT::Other),
15131 Ops, LD->getMemoryVT(), LD->getMemOperand());
15132
15133 // If this is an i16 load, insert the truncate.
15134 SDValue ResVal = BSLoad;
15135 if (N->getValueType(0) == MVT::i16)
15136 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15137
15138 // First, combine the bswap away. This makes the value produced by the
15139 // load dead.
15140 DCI.CombineTo(N, ResVal);
15141
15142 // Next, combine the load away, we give it a bogus result value but a real
15143 // chain result. The result value is dead because the bswap is dead.
15144 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15145
15146 // Return N so it doesn't get rechecked!
15147 return SDValue(N, 0);
15148 }
15149 break;
15150 case PPCISD::VCMP:
15151 // If a VCMPo node already exists with exactly the same operands as this
15152 // node, use its result instead of this node (VCMPo computes both a CR6 and
15153 // a normal output).
15154 //
15155 if (!N->getOperand(0).hasOneUse() &&
15156 !N->getOperand(1).hasOneUse() &&
15157 !N->getOperand(2).hasOneUse()) {
15158
15159 // Scan all of the users of the LHS, looking for VCMPo's that match.
15160 SDNode *VCMPoNode = nullptr;
15161
15162 SDNode *LHSN = N->getOperand(0).getNode();
15163 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15164 UI != E; ++UI)
15165 if (UI->getOpcode() == PPCISD::VCMPo &&
15166 UI->getOperand(1) == N->getOperand(1) &&
15167 UI->getOperand(2) == N->getOperand(2) &&
15168 UI->getOperand(0) == N->getOperand(0)) {
15169 VCMPoNode = *UI;
15170 break;
15171 }
15172
15173 // If there is no VCMPo node, or if the flag value has a single use, don't
15174 // transform this.
15175 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
15176 break;
15177
15178 // Look at the (necessarily single) use of the flag value. If it has a
15179 // chain, this transformation is more complex. Note that multiple things
15180 // could use the value result, which we should ignore.
15181 SDNode *FlagUser = nullptr;
15182 for (SDNode::use_iterator UI = VCMPoNode->use_begin();
15183 FlagUser == nullptr; ++UI) {
15184 assert(UI != VCMPoNode->use_end() && "Didn't find user!")((UI != VCMPoNode->use_end() && "Didn't find user!"
) ? static_cast<void> (0) : __assert_fail ("UI != VCMPoNode->use_end() && \"Didn't find user!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15184, __PRETTY_FUNCTION__))
;
15185 SDNode *User = *UI;
15186 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15187 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
15188 FlagUser = User;
15189 break;
15190 }
15191 }
15192 }
15193
15194 // If the user is a MFOCRF instruction, we know this is safe.
15195 // Otherwise we give up for right now.
15196 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15197 return SDValue(VCMPoNode, 0);
15198 }
15199 break;
15200 case ISD::BRCOND: {
15201 SDValue Cond = N->getOperand(1);
15202 SDValue Target = N->getOperand(2);
15203
15204 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15205 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15206 Intrinsic::loop_decrement) {
15207
15208 // We now need to make the intrinsic dead (it cannot be instruction
15209 // selected).
15210 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15211 assert(Cond.getNode()->hasOneUse() &&((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15212, __PRETTY_FUNCTION__))
15212 "Counter decrement has more than one use")((Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("Cond.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15212, __PRETTY_FUNCTION__))
;
15213
15214 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15215 N->getOperand(0), Target);
15216 }
15217 }
15218 break;
15219 case ISD::BR_CC: {
15220 // If this is a branch on an altivec predicate comparison, lower this so
15221 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15222 // lowering is done pre-legalize, because the legalizer lowers the predicate
15223 // compare down to code that is difficult to reassemble.
15224 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15225 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15226
15227 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15228 // value. If so, pass-through the AND to get to the intrinsic.
15229 if (LHS.getOpcode() == ISD::AND &&
15230 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15231 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15232 Intrinsic::loop_decrement &&
15233 isa<ConstantSDNode>(LHS.getOperand(1)) &&
15234 !isNullConstant(LHS.getOperand(1)))
15235 LHS = LHS.getOperand(0);
15236
15237 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15238 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15239 Intrinsic::loop_decrement &&
15240 isa<ConstantSDNode>(RHS)) {
15241 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15242, __PRETTY_FUNCTION__))
15242 "Counter decrement comparison is not EQ or NE")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Counter decrement comparison is not EQ or NE\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15242, __PRETTY_FUNCTION__))
;
15243
15244 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15245 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15246 (CC == ISD::SETNE && !Val);
15247
15248 // We now need to make the intrinsic dead (it cannot be instruction
15249 // selected).
15250 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15251 assert(LHS.getNode()->hasOneUse() &&((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15252, __PRETTY_FUNCTION__))
15252 "Counter decrement has more than one use")((LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"
) ? static_cast<void> (0) : __assert_fail ("LHS.getNode()->hasOneUse() && \"Counter decrement has more than one use\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15252, __PRETTY_FUNCTION__))
;
15253
15254 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15255 N->getOperand(0), N->getOperand(4));
15256 }
15257
15258 int CompareOpc;
15259 bool isDot;
15260
15261 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15262 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15263 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15264 assert(isDot && "Can't compare against a vector result!")((isDot && "Can't compare against a vector result!") ?
static_cast<void> (0) : __assert_fail ("isDot && \"Can't compare against a vector result!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15264, __PRETTY_FUNCTION__))
;
15265
15266 // If this is a comparison against something other than 0/1, then we know
15267 // that the condition is never/always true.
15268 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15269 if (Val != 0 && Val != 1) {
15270 if (CC == ISD::SETEQ) // Cond never true, remove branch.
15271 return N->getOperand(0);
15272 // Always !=, turn it into an unconditional branch.
15273 return DAG.getNode(ISD::BR, dl, MVT::Other,
15274 N->getOperand(0), N->getOperand(4));
15275 }
15276
15277 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15278
15279 // Create the PPCISD altivec 'dot' comparison node.
15280 SDValue Ops[] = {
15281 LHS.getOperand(2), // LHS of compare
15282 LHS.getOperand(3), // RHS of compare
15283 DAG.getConstant(CompareOpc, dl, MVT::i32)
15284 };
15285 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15286 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
15287
15288 // Unpack the result based on how the target uses it.
15289 PPC::Predicate CompOpc;
15290 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15291 default: // Can't happen, don't crash on invalid number though.
15292 case 0: // Branch on the value of the EQ bit of CR6.
15293 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15294 break;
15295 case 1: // Branch on the inverted value of the EQ bit of CR6.
15296 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15297 break;
15298 case 2: // Branch on the value of the LT bit of CR6.
15299 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15300 break;
15301 case 3: // Branch on the inverted value of the LT bit of CR6.
15302 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15303 break;
15304 }
15305
15306 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15307 DAG.getConstant(CompOpc, dl, MVT::i32),
15308 DAG.getRegister(PPC::CR6, MVT::i32),
15309 N->getOperand(4), CompNode.getValue(1));
15310 }
15311 break;
15312 }
15313 case ISD::BUILD_VECTOR:
15314 return DAGCombineBuildVector(N, DCI);
15315 case ISD::ABS:
15316 return combineABS(N, DCI);
15317 case ISD::VSELECT:
15318 return combineVSelect(N, DCI);
15319 }
15320
15321 return SDValue();
15322}
15323
15324SDValue
15325PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15326 SelectionDAG &DAG,
15327 SmallVectorImpl<SDNode *> &Created) const {
15328 // fold (sdiv X, pow2)
15329 EVT VT = N->getValueType(0);
15330 if (VT == MVT::i64 && !Subtarget.isPPC64())
15331 return SDValue();
15332 if ((VT != MVT::i32 && VT != MVT::i64) ||
15333 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15334 return SDValue();
15335
15336 SDLoc DL(N);
15337 SDValue N0 = N->getOperand(0);
15338
15339 bool IsNegPow2 = (-Divisor).isPowerOf2();
15340 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15341 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15342
15343 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15344 Created.push_back(Op.getNode());
15345
15346 if (IsNegPow2) {
15347 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15348 Created.push_back(Op.getNode());
15349 }
15350
15351 return Op;
15352}
15353
15354//===----------------------------------------------------------------------===//
15355// Inline Assembly Support
15356//===----------------------------------------------------------------------===//
15357
15358void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15359 KnownBits &Known,
15360 const APInt &DemandedElts,
15361 const SelectionDAG &DAG,
15362 unsigned Depth) const {
15363 Known.resetAll();
15364 switch (Op.getOpcode()) {
15365 default: break;
15366 case PPCISD::LBRX: {
15367 // lhbrx is known to have the top bits cleared out.
15368 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15369 Known.Zero = 0xFFFF0000;
15370 break;
15371 }
15372 case ISD::INTRINSIC_WO_CHAIN: {
15373 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15374 default: break;
15375 case Intrinsic::ppc_altivec_vcmpbfp_p:
15376 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15377 case Intrinsic::ppc_altivec_vcmpequb_p:
15378 case Intrinsic::ppc_altivec_vcmpequh_p:
15379 case Intrinsic::ppc_altivec_vcmpequw_p:
15380 case Intrinsic::ppc_altivec_vcmpequd_p:
15381 case Intrinsic::ppc_altivec_vcmpequq_p:
15382 case Intrinsic::ppc_altivec_vcmpgefp_p:
15383 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15384 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15385 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15386 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15387 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15388 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15389 case Intrinsic::ppc_altivec_vcmpgtub_p:
15390 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15391 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15392 case Intrinsic::ppc_altivec_vcmpgtud_p:
15393 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15394 Known.Zero = ~1U; // All bits but the low one are known to be zero.
15395 break;
15396 }
15397 }
15398 }
15399}
15400
15401Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15402 switch (Subtarget.getCPUDirective()) {
15403 default: break;
15404 case PPC::DIR_970:
15405 case PPC::DIR_PWR4:
15406 case PPC::DIR_PWR5:
15407 case PPC::DIR_PWR5X:
15408 case PPC::DIR_PWR6:
15409 case PPC::DIR_PWR6X:
15410 case PPC::DIR_PWR7:
15411 case PPC::DIR_PWR8:
15412 case PPC::DIR_PWR9:
15413 case PPC::DIR_PWR10:
15414 case PPC::DIR_PWR_FUTURE: {
15415 if (!ML)
15416 break;
15417
15418 if (!DisableInnermostLoopAlign32) {
15419 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15420 // so that we can decrease cache misses and branch-prediction misses.
15421 // Actual alignment of the loop will depend on the hotness check and other
15422 // logic in alignBlocks.
15423 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15424 return Align(32);
15425 }
15426
15427 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15428
15429 // For small loops (between 5 and 8 instructions), align to a 32-byte
15430 // boundary so that the entire loop fits in one instruction-cache line.
15431 uint64_t LoopSize = 0;
15432 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15433 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15434 LoopSize += TII->getInstSizeInBytes(*J);
15435 if (LoopSize > 32)
15436 break;
15437 }
15438
15439 if (LoopSize > 16 && LoopSize <= 32)
15440 return Align(32);
15441
15442 break;
15443 }
15444 }
15445
15446 return TargetLowering::getPrefLoopAlignment(ML);
15447}
15448
15449/// getConstraintType - Given a constraint, return the type of
15450/// constraint it is for this target.
15451PPCTargetLowering::ConstraintType
15452PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15453 if (Constraint.size() == 1) {
15454 switch (Constraint[0]) {
15455 default: break;
15456 case 'b':
15457 case 'r':
15458 case 'f':
15459 case 'd':
15460 case 'v':
15461 case 'y':
15462 return C_RegisterClass;
15463 case 'Z':
15464 // FIXME: While Z does indicate a memory constraint, it specifically
15465 // indicates an r+r address (used in conjunction with the 'y' modifier
15466 // in the replacement string). Currently, we're forcing the base
15467 // register to be r0 in the asm printer (which is interpreted as zero)
15468 // and forming the complete address in the second register. This is
15469 // suboptimal.
15470 return C_Memory;
15471 }
15472 } else if (Constraint == "wc") { // individual CR bits.
15473 return C_RegisterClass;
15474 } else if (Constraint == "wa" || Constraint == "wd" ||
15475 Constraint == "wf" || Constraint == "ws" ||
15476 Constraint == "wi" || Constraint == "ww") {
15477 return C_RegisterClass; // VSX registers.
15478 }
15479 return TargetLowering::getConstraintType(Constraint);
15480}
15481
15482/// Examine constraint type and operand type and determine a weight value.
15483/// This object must already have been set up with the operand type
15484/// and the current alternative constraint selected.
15485TargetLowering::ConstraintWeight
15486PPCTargetLowering::getSingleConstraintMatchWeight(
15487 AsmOperandInfo &info, const char *constraint) const {
15488 ConstraintWeight weight = CW_Invalid;
15489 Value *CallOperandVal = info.CallOperandVal;
15490 // If we don't have a value, we can't do a match,
15491 // but allow it at the lowest weight.
15492 if (!CallOperandVal)
15493 return CW_Default;
15494 Type *type = CallOperandVal->getType();
15495
15496 // Look at the constraint type.
15497 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15498 return CW_Register; // an individual CR bit.
15499 else if ((StringRef(constraint) == "wa" ||
15500 StringRef(constraint) == "wd" ||
15501 StringRef(constraint) == "wf") &&
15502 type->isVectorTy())
15503 return CW_Register;
15504 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15505 return CW_Register; // just hold 64-bit integers data.
15506 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15507 return CW_Register;
15508 else if (StringRef(constraint) == "ww" && type->isFloatTy())
15509 return CW_Register;
15510
15511 switch (*constraint) {
15512 default:
15513 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15514 break;
15515 case 'b':
15516 if (type->isIntegerTy())
15517 weight = CW_Register;
15518 break;
15519 case 'f':
15520 if (type->isFloatTy())
15521 weight = CW_Register;
15522 break;
15523 case 'd':
15524 if (type->isDoubleTy())
15525 weight = CW_Register;
15526 break;
15527 case 'v':
15528 if (type->isVectorTy())
15529 weight = CW_Register;
15530 break;
15531 case 'y':
15532 weight = CW_Register;
15533 break;
15534 case 'Z':
15535 weight = CW_Memory;
15536 break;
15537 }
15538 return weight;
15539}
15540
15541std::pair<unsigned, const TargetRegisterClass *>
15542PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15543 StringRef Constraint,
15544 MVT VT) const {
15545 if (Constraint.size() == 1) {
15546 // GCC RS6000 Constraint Letters
15547 switch (Constraint[0]) {
15548 case 'b': // R1-R31
15549 if (VT == MVT::i64 && Subtarget.isPPC64())
15550 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15551 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15552 case 'r': // R0-R31
15553 if (VT == MVT::i64 && Subtarget.isPPC64())
15554 return std::make_pair(0U, &PPC::G8RCRegClass);
15555 return std::make_pair(0U, &PPC::GPRCRegClass);
15556 // 'd' and 'f' constraints are both defined to be "the floating point
15557 // registers", where one is for 32-bit and the other for 64-bit. We don't
15558 // really care overly much here so just give them all the same reg classes.
15559 case 'd':
15560 case 'f':
15561 if (Subtarget.hasSPE()) {
15562 if (VT == MVT::f32 || VT == MVT::i32)
15563 return std::make_pair(0U, &PPC::GPRCRegClass);
15564 if (VT == MVT::f64 || VT == MVT::i64)
15565 return std::make_pair(0U, &PPC::SPERCRegClass);
15566 } else {
15567 if (VT == MVT::f32 || VT == MVT::i32)
15568 return std::make_pair(0U, &PPC::F4RCRegClass);
15569 if (VT == MVT::f64 || VT == MVT::i64)
15570 return std::make_pair(0U, &PPC::F8RCRegClass);
15571 }
15572 break;
15573 case 'v':
15574 if (Subtarget.hasAltivec())
15575 return std::make_pair(0U, &PPC::VRRCRegClass);
15576 break;
15577 case 'y': // crrc
15578 return std::make_pair(0U, &PPC::CRRCRegClass);
15579 }
15580 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15581 // An individual CR bit.
15582 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15583 } else if ((Constraint == "wa" || Constraint == "wd" ||
15584 Constraint == "wf" || Constraint == "wi") &&
15585 Subtarget.hasVSX()) {
15586 return std::make_pair(0U, &PPC::VSRCRegClass);
15587 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15588 if (VT == MVT::f32 && Subtarget.hasP8Vector())
15589 return std::make_pair(0U, &PPC::VSSRCRegClass);
15590 else
15591 return std::make_pair(0U, &PPC::VSFRCRegClass);
15592 }
15593
15594 // If we name a VSX register, we can't defer to the base class because it
15595 // will not recognize the correct register (their names will be VSL{0-31}
15596 // and V{0-31} so they won't match). So we match them here.
15597 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15598 int VSNum = atoi(Constraint.data() + 3);
15599 assert(VSNum >= 0 && VSNum <= 63 &&((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15600, __PRETTY_FUNCTION__))
15600 "Attempted to access a vsr out of range")((VSNum >= 0 && VSNum <= 63 && "Attempted to access a vsr out of range"
) ? static_cast<void> (0) : __assert_fail ("VSNum >= 0 && VSNum <= 63 && \"Attempted to access a vsr out of range\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15600, __PRETTY_FUNCTION__))
;
15601 if (VSNum < 32)
15602 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15603 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15604 }
15605 std::pair<unsigned, const TargetRegisterClass *> R =
15606 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15607
15608 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15609 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15610 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15611 // register.
15612 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15613 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15614 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15615 PPC::GPRCRegClass.contains(R.first))
15616 return std::make_pair(TRI->getMatchingSuperReg(R.first,
15617 PPC::sub_32, &PPC::G8RCRegClass),
15618 &PPC::G8RCRegClass);
15619
15620 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15621 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15622 R.first = PPC::CR0;
15623 R.second = &PPC::CRRCRegClass;
15624 }
15625
15626 return R;
15627}
15628
15629/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15630/// vector. If it is invalid, don't add anything to Ops.
15631void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15632 std::string &Constraint,
15633 std::vector<SDValue>&Ops,
15634 SelectionDAG &DAG) const {
15635 SDValue Result;
15636
15637 // Only support length 1 constraints.
15638 if (Constraint.length() > 1) return;
15639
15640 char Letter = Constraint[0];
15641 switch (Letter) {
15642 default: break;
15643 case 'I':
15644 case 'J':
15645 case 'K':
15646 case 'L':
15647 case 'M':
15648 case 'N':
15649 case 'O':
15650 case 'P': {
15651 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15652 if (!CST) return; // Must be an immediate to match.
15653 SDLoc dl(Op);
15654 int64_t Value = CST->getSExtValue();
15655 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15656 // numbers are printed as such.
15657 switch (Letter) {
15658 default: llvm_unreachable("Unknown constraint letter!")::llvm::llvm_unreachable_internal("Unknown constraint letter!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15658)
;
15659 case 'I': // "I" is a signed 16-bit constant.
15660 if (isInt<16>(Value))
15661 Result = DAG.getTargetConstant(Value, dl, TCVT);
15662 break;
15663 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15664 if (isShiftedUInt<16, 16>(Value))
15665 Result = DAG.getTargetConstant(Value, dl, TCVT);
15666 break;
15667 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15668 if (isShiftedInt<16, 16>(Value))
15669 Result = DAG.getTargetConstant(Value, dl, TCVT);
15670 break;
15671 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15672 if (isUInt<16>(Value))
15673 Result = DAG.getTargetConstant(Value, dl, TCVT);
15674 break;
15675 case 'M': // "M" is a constant that is greater than 31.
15676 if (Value > 31)
15677 Result = DAG.getTargetConstant(Value, dl, TCVT);
15678 break;
15679 case 'N': // "N" is a positive constant that is an exact power of two.
15680 if (Value > 0 && isPowerOf2_64(Value))
15681 Result = DAG.getTargetConstant(Value, dl, TCVT);
15682 break;
15683 case 'O': // "O" is the constant zero.
15684 if (Value == 0)
15685 Result = DAG.getTargetConstant(Value, dl, TCVT);
15686 break;
15687 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15688 if (isInt<16>(-Value))
15689 Result = DAG.getTargetConstant(Value, dl, TCVT);
15690 break;
15691 }
15692 break;
15693 }
15694 }
15695
15696 if (Result.getNode()) {
15697 Ops.push_back(Result);
15698 return;
15699 }
15700
15701 // Handle standard constraint letters.
15702 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15703}
15704
15705// isLegalAddressingMode - Return true if the addressing mode represented
15706// by AM is legal for this target, for a load/store of the specified type.
15707bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15708 const AddrMode &AM, Type *Ty,
15709 unsigned AS,
15710 Instruction *I) const {
15711 // Vector type r+i form is supported since power9 as DQ form. We don't check
15712 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15713 // imm form is preferred and the offset can be adjusted to use imm form later
15714 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15715 // max offset to check legal addressing mode, we should be a little aggressive
15716 // to contain other offsets for that LSRUse.
15717 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15718 return false;
15719
15720 // PPC allows a sign-extended 16-bit immediate field.
15721 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15722 return false;
15723
15724 // No global is ever allowed as a base.
15725 if (AM.BaseGV)
15726 return false;
15727
15728 // PPC only support r+r,
15729 switch (AM.Scale) {
15730 case 0: // "r+i" or just "i", depending on HasBaseReg.
15731 break;
15732 case 1:
15733 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15734 return false;
15735 // Otherwise we have r+r or r+i.
15736 break;
15737 case 2:
15738 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15739 return false;
15740 // Allow 2*r as r+r.
15741 break;
15742 default:
15743 // No other scales are supported.
15744 return false;
15745 }
15746
15747 return true;
15748}
15749
15750SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15751 SelectionDAG &DAG) const {
15752 MachineFunction &MF = DAG.getMachineFunction();
15753 MachineFrameInfo &MFI = MF.getFrameInfo();
15754 MFI.setReturnAddressIsTaken(true);
15755
15756 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15757 return SDValue();
15758
15759 SDLoc dl(Op);
15760 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15761
15762 // Make sure the function does not optimize away the store of the RA to
15763 // the stack.
15764 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15765 FuncInfo->setLRStoreRequired();
15766 bool isPPC64 = Subtarget.isPPC64();
15767 auto PtrVT = getPointerTy(MF.getDataLayout());
15768
15769 if (Depth > 0) {
15770 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15771 SDValue Offset =
15772 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15773 isPPC64 ? MVT::i64 : MVT::i32);
15774 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15775 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15776 MachinePointerInfo());
15777 }
15778
15779 // Just load the return address off the stack.
15780 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15781 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15782 MachinePointerInfo());
15783}
15784
15785SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15786 SelectionDAG &DAG) const {
15787 SDLoc dl(Op);
15788 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15789
15790 MachineFunction &MF = DAG.getMachineFunction();
15791 MachineFrameInfo &MFI = MF.getFrameInfo();
15792 MFI.setFrameAddressIsTaken(true);
15793
15794 EVT PtrVT = getPointerTy(MF.getDataLayout());
15795 bool isPPC64 = PtrVT == MVT::i64;
15796
15797 // Naked functions never have a frame pointer, and so we use r1. For all
15798 // other functions, this decision must be delayed until during PEI.
15799 unsigned FrameReg;
15800 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15801 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15802 else
15803 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15804
15805 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15806 PtrVT);
15807 while (Depth--)
15808 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15809 FrameAddr, MachinePointerInfo());
15810 return FrameAddr;
15811}
15812
15813// FIXME? Maybe this could be a TableGen attribute on some registers and
15814// this table could be generated automatically from RegInfo.
15815Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15816 const MachineFunction &MF) const {
15817 bool isPPC64 = Subtarget.isPPC64();
15818
15819 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15820 if (!is64Bit && VT != LLT::scalar(32))
15821 report_fatal_error("Invalid register global variable type");
15822
15823 Register Reg = StringSwitch<Register>(RegName)
15824 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15825 .Case("r2", isPPC64 ? Register() : PPC::R2)
15826 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15827 .Default(Register());
15828
15829 if (Reg)
15830 return Reg;
15831 report_fatal_error("Invalid register name global variable");
15832}
15833
15834bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15835 // 32-bit SVR4 ABI access everything as got-indirect.
15836 if (Subtarget.is32BitELFABI())
15837 return true;
15838
15839 // AIX accesses everything indirectly through the TOC, which is similar to
15840 // the GOT.
15841 if (Subtarget.isAIXABI())
15842 return true;
15843
15844 CodeModel::Model CModel = getTargetMachine().getCodeModel();
15845 // If it is small or large code model, module locals are accessed
15846 // indirectly by loading their address from .toc/.got.
15847 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15848 return true;
15849
15850 // JumpTable and BlockAddress are accessed as got-indirect.
15851 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15852 return true;
15853
15854 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15855 return Subtarget.isGVIndirectSymbol(G->getGlobal());
15856
15857 return false;
15858}
15859
15860bool
15861PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15862 // The PowerPC target isn't yet aware of offsets.
15863 return false;
15864}
15865
15866bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15867 const CallInst &I,
15868 MachineFunction &MF,
15869 unsigned Intrinsic) const {
15870 switch (Intrinsic) {
15871 case Intrinsic::ppc_altivec_lvx:
15872 case Intrinsic::ppc_altivec_lvxl:
15873 case Intrinsic::ppc_altivec_lvebx:
15874 case Intrinsic::ppc_altivec_lvehx:
15875 case Intrinsic::ppc_altivec_lvewx:
15876 case Intrinsic::ppc_vsx_lxvd2x:
15877 case Intrinsic::ppc_vsx_lxvw4x:
15878 case Intrinsic::ppc_vsx_lxvd2x_be:
15879 case Intrinsic::ppc_vsx_lxvw4x_be:
15880 case Intrinsic::ppc_vsx_lxvl:
15881 case Intrinsic::ppc_vsx_lxvll: {
15882 EVT VT;
15883 switch (Intrinsic) {
15884 case Intrinsic::ppc_altivec_lvebx:
15885 VT = MVT::i8;
15886 break;
15887 case Intrinsic::ppc_altivec_lvehx:
15888 VT = MVT::i16;
15889 break;
15890 case Intrinsic::ppc_altivec_lvewx:
15891 VT = MVT::i32;
15892 break;
15893 case Intrinsic::ppc_vsx_lxvd2x:
15894 case Intrinsic::ppc_vsx_lxvd2x_be:
15895 VT = MVT::v2f64;
15896 break;
15897 default:
15898 VT = MVT::v4i32;
15899 break;
15900 }
15901
15902 Info.opc = ISD::INTRINSIC_W_CHAIN;
15903 Info.memVT = VT;
15904 Info.ptrVal = I.getArgOperand(0);
15905 Info.offset = -VT.getStoreSize()+1;
15906 Info.size = 2*VT.getStoreSize()-1;
15907 Info.align = Align(1);
15908 Info.flags = MachineMemOperand::MOLoad;
15909 return true;
15910 }
15911 case Intrinsic::ppc_altivec_stvx:
15912 case Intrinsic::ppc_altivec_stvxl:
15913 case Intrinsic::ppc_altivec_stvebx:
15914 case Intrinsic::ppc_altivec_stvehx:
15915 case Intrinsic::ppc_altivec_stvewx:
15916 case Intrinsic::ppc_vsx_stxvd2x:
15917 case Intrinsic::ppc_vsx_stxvw4x:
15918 case Intrinsic::ppc_vsx_stxvd2x_be:
15919 case Intrinsic::ppc_vsx_stxvw4x_be:
15920 case Intrinsic::ppc_vsx_stxvl:
15921 case Intrinsic::ppc_vsx_stxvll: {
15922 EVT VT;
15923 switch (Intrinsic) {
15924 case Intrinsic::ppc_altivec_stvebx:
15925 VT = MVT::i8;
15926 break;
15927 case Intrinsic::ppc_altivec_stvehx:
15928 VT = MVT::i16;
15929 break;
15930 case Intrinsic::ppc_altivec_stvewx:
15931 VT = MVT::i32;
15932 break;
15933 case Intrinsic::ppc_vsx_stxvd2x:
15934 case Intrinsic::ppc_vsx_stxvd2x_be:
15935 VT = MVT::v2f64;
15936 break;
15937 default:
15938 VT = MVT::v4i32;
15939 break;
15940 }
15941
15942 Info.opc = ISD::INTRINSIC_VOID;
15943 Info.memVT = VT;
15944 Info.ptrVal = I.getArgOperand(1);
15945 Info.offset = -VT.getStoreSize()+1;
15946 Info.size = 2*VT.getStoreSize()-1;
15947 Info.align = Align(1);
15948 Info.flags = MachineMemOperand::MOStore;
15949 return true;
15950 }
15951 default:
15952 break;
15953 }
15954
15955 return false;
15956}
15957
15958/// It returns EVT::Other if the type should be determined using generic
15959/// target-independent logic.
15960EVT PPCTargetLowering::getOptimalMemOpType(
15961 const MemOp &Op, const AttributeList &FuncAttributes) const {
15962 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15963 // We should use Altivec/VSX loads and stores when available. For unaligned
15964 // addresses, unaligned VSX loads are only fast starting with the P8.
15965 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15966 (Op.isAligned(Align(16)) ||
15967 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15968 return MVT::v4i32;
15969 }
15970
15971 if (Subtarget.isPPC64()) {
15972 return MVT::i64;
15973 }
15974
15975 return MVT::i32;
15976}
15977
15978/// Returns true if it is beneficial to convert a load of a constant
15979/// to just the constant itself.
15980bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15981 Type *Ty) const {
15982 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 15982, __PRETTY_FUNCTION__))
;
15983
15984 unsigned BitSize = Ty->getPrimitiveSizeInBits();
15985 return !(BitSize == 0 || BitSize > 64);
15986}
15987
15988bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15989 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15990 return false;
15991 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15992 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15993 return NumBits1 == 64 && NumBits2 == 32;
15994}
15995
15996bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15997 if (!VT1.isInteger() || !VT2.isInteger())
15998 return false;
15999 unsigned NumBits1 = VT1.getSizeInBits();
16000 unsigned NumBits2 = VT2.getSizeInBits();
16001 return NumBits1 == 64 && NumBits2 == 32;
16002}
16003
16004bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16005 // Generally speaking, zexts are not free, but they are free when they can be
16006 // folded with other operations.
16007 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16008 EVT MemVT = LD->getMemoryVT();
16009 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16010 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16011 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16012 LD->getExtensionType() == ISD::ZEXTLOAD))
16013 return true;
16014 }
16015
16016 // FIXME: Add other cases...
16017 // - 32-bit shifts with a zext to i64
16018 // - zext after ctlz, bswap, etc.
16019 // - zext after and by a constant mask
16020
16021 return TargetLowering::isZExtFree(Val, VT2);
16022}
16023
16024bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16025 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16026, __PRETTY_FUNCTION__))
16026 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16026, __PRETTY_FUNCTION__))
;
16027 // Extending to float128 is not free.
16028 if (DestVT == MVT::f128)
16029 return false;
16030 return true;
16031}
16032
16033bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16034 return isInt<16>(Imm) || isUInt<16>(Imm);
16035}
16036
16037bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16038 return isInt<16>(Imm) || isUInt<16>(Imm);
16039}
16040
16041bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
16042 unsigned,
16043 unsigned,
16044 MachineMemOperand::Flags,
16045 bool *Fast) const {
16046 if (DisablePPCUnaligned)
16047 return false;
16048
16049 // PowerPC supports unaligned memory access for simple non-vector types.
16050 // Although accessing unaligned addresses is not as efficient as accessing
16051 // aligned addresses, it is generally more efficient than manual expansion,
16052 // and generally only traps for software emulation when crossing page
16053 // boundaries.
16054
16055 if (!VT.isSimple())
16056 return false;
16057
16058 if (VT.isFloatingPoint() && !VT.isVector() &&
16059 !Subtarget.allowsUnalignedFPAccess())
16060 return false;
16061
16062 if (VT.getSimpleVT().isVector()) {
16063 if (Subtarget.hasVSX()) {
16064 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16065 VT != MVT::v4f32 && VT != MVT::v4i32)
16066 return false;
16067 } else {
16068 return false;
16069 }
16070 }
16071
16072 if (VT == MVT::ppcf128)
16073 return false;
16074
16075 if (Fast)
16076 *Fast = true;
16077
16078 return true;
16079}
16080
16081bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16082 SDValue C) const {
16083 // Check integral scalar types.
16084 if (!VT.isScalarInteger())
16085 return false;
16086 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16087 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16088 return false;
16089 // This transformation will generate >= 2 operations. But the following
16090 // cases will generate <= 2 instructions during ISEL. So exclude them.
16091 // 1. If the constant multiplier fits 16 bits, it can be handled by one
16092 // HW instruction, ie. MULLI
16093 // 2. If the multiplier after shifted fits 16 bits, an extra shift
16094 // instruction is needed than case 1, ie. MULLI and RLDICR
16095 int64_t Imm = ConstNode->getSExtValue();
16096 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16097 Imm >>= Shift;
16098 if (isInt<16>(Imm))
16099 return false;
16100 uint64_t UImm = static_cast<uint64_t>(Imm);
16101 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16102 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16103 return true;
16104 }
16105 return false;
16106}
16107
16108bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16109 EVT VT) const {
16110 return isFMAFasterThanFMulAndFAdd(
16111 MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16112}
16113
16114bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16115 Type *Ty) const {
16116 switch (Ty->getScalarType()->getTypeID()) {
16117 case Type::FloatTyID:
16118 case Type::DoubleTyID:
16119 return true;
16120 case Type::FP128TyID:
16121 return Subtarget.hasP9Vector();
16122 default:
16123 return false;
16124 }
16125}
16126
16127// FIXME: add more patterns which are not profitable to hoist.
16128bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16129 if (!I->hasOneUse())
16130 return true;
16131
16132 Instruction *User = I->user_back();
16133 assert(User && "A single use instruction with no uses.")((User && "A single use instruction with no uses.") ?
static_cast<void> (0) : __assert_fail ("User && \"A single use instruction with no uses.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16133, __PRETTY_FUNCTION__))
;
16134
16135 switch (I->getOpcode()) {
16136 case Instruction::FMul: {
16137 // Don't break FMA, PowerPC prefers FMA.
16138 if (User->getOpcode() != Instruction::FSub &&
16139 User->getOpcode() != Instruction::FAdd)
16140 return true;
16141
16142 const TargetOptions &Options = getTargetMachine().Options;
16143 const Function *F = I->getFunction();
16144 const DataLayout &DL = F->getParent()->getDataLayout();
16145 Type *Ty = User->getOperand(0)->getType();
16146
16147 return !(
16148 isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16149 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16150 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16151 }
16152 case Instruction::Load: {
16153 // Don't break "store (load float*)" pattern, this pattern will be combined
16154 // to "store (load int32)" in later InstCombine pass. See function
16155 // combineLoadToOperationType. On PowerPC, loading a float point takes more
16156 // cycles than loading a 32 bit integer.
16157 LoadInst *LI = cast<LoadInst>(I);
16158 // For the loads that combineLoadToOperationType does nothing, like
16159 // ordered load, it should be profitable to hoist them.
16160 // For swifterror load, it can only be used for pointer to pointer type, so
16161 // later type check should get rid of this case.
16162 if (!LI->isUnordered())
16163 return true;
16164
16165 if (User->getOpcode() != Instruction::Store)
16166 return true;
16167
16168 if (I->getType()->getTypeID() != Type::FloatTyID)
16169 return true;
16170
16171 return false;
16172 }
16173 default:
16174 return true;
16175 }
16176 return true;
16177}
16178
16179const MCPhysReg *
16180PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16181 // LR is a callee-save register, but we must treat it as clobbered by any call
16182 // site. Hence we include LR in the scratch registers, which are in turn added
16183 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16184 // to CTR, which is used by any indirect call.
16185 static const MCPhysReg ScratchRegs[] = {
16186 PPC::X12, PPC::LR8, PPC::CTR8, 0
16187 };
16188
16189 return ScratchRegs;
16190}
16191
16192Register PPCTargetLowering::getExceptionPointerRegister(
16193 const Constant *PersonalityFn) const {
16194 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16195}
16196
16197Register PPCTargetLowering::getExceptionSelectorRegister(
16198 const Constant *PersonalityFn) const {
16199 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16200}
16201
16202bool
16203PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16204 EVT VT , unsigned DefinedValues) const {
16205 if (VT == MVT::v2i64)
16206 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16207
16208 if (Subtarget.hasVSX())
16209 return true;
16210
16211 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16212}
16213
16214Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16215 if (DisableILPPref || Subtarget.enableMachineScheduler())
16216 return TargetLowering::getSchedulingPreference(N);
16217
16218 return Sched::ILP;
16219}
16220
16221// Create a fast isel object.
16222FastISel *
16223PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16224 const TargetLibraryInfo *LibInfo) const {
16225 return PPC::createFastISel(FuncInfo, LibInfo);
16226}
16227
16228// 'Inverted' means the FMA opcode after negating one multiplicand.
16229// For example, (fma -a b c) = (fnmsub a b c)
16230static unsigned invertFMAOpcode(unsigned Opc) {
16231 switch (Opc) {
16232 default:
16233 llvm_unreachable("Invalid FMA opcode for PowerPC!")::llvm::llvm_unreachable_internal("Invalid FMA opcode for PowerPC!"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16233)
;
16234 case ISD::FMA:
16235 return PPCISD::FNMSUB;
16236 case PPCISD::FNMSUB:
16237 return ISD::FMA;
16238 }
16239}
16240
16241SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16242 bool LegalOps, bool OptForSize,
16243 NegatibleCost &Cost,
16244 unsigned Depth) const {
16245 if (Depth > SelectionDAG::MaxRecursionDepth)
16246 return SDValue();
16247
16248 unsigned Opc = Op.getOpcode();
16249 EVT VT = Op.getValueType();
16250 SDNodeFlags Flags = Op.getNode()->getFlags();
16251
16252 switch (Opc) {
16253 case PPCISD::FNMSUB:
16254 if (!Op.hasOneUse() || !isTypeLegal(VT))
16255 break;
16256
16257 const TargetOptions &Options = getTargetMachine().Options;
16258 SDValue N0 = Op.getOperand(0);
16259 SDValue N1 = Op.getOperand(1);
16260 SDValue N2 = Op.getOperand(2);
16261 SDLoc Loc(Op);
16262
16263 NegatibleCost N2Cost = NegatibleCost::Expensive;
16264 SDValue NegN2 =
16265 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16266
16267 if (!NegN2)
16268 return SDValue();
16269
16270 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16271 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16272 // These transformations may change sign of zeroes. For example,
16273 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16274 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16275 // Try and choose the cheaper one to negate.
16276 NegatibleCost N0Cost = NegatibleCost::Expensive;
16277 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16278 N0Cost, Depth + 1);
16279
16280 NegatibleCost N1Cost = NegatibleCost::Expensive;
16281 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16282 N1Cost, Depth + 1);
16283
16284 if (NegN0 && N0Cost <= N1Cost) {
16285 Cost = std::min(N0Cost, N2Cost);
16286 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16287 } else if (NegN1) {
16288 Cost = std::min(N1Cost, N2Cost);
16289 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16290 }
16291 }
16292
16293 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16294 if (isOperationLegal(ISD::FMA, VT)) {
16295 Cost = N2Cost;
16296 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16297 }
16298
16299 break;
16300 }
16301
16302 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16303 Cost, Depth);
16304}
16305
16306// Override to enable LOAD_STACK_GUARD lowering on Linux.
16307bool PPCTargetLowering::useLoadStackGuardNode() const {
16308 if (!Subtarget.isTargetLinux())
16309 return TargetLowering::useLoadStackGuardNode();
16310 return true;
16311}
16312
16313// Override to disable global variable loading on Linux.
16314void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16315 if (!Subtarget.isTargetLinux())
16316 return TargetLowering::insertSSPDeclarations(M);
16317}
16318
16319bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16320 bool ForCodeSize) const {
16321 if (!VT.isSimple() || !Subtarget.hasVSX())
16322 return false;
16323
16324 switch(VT.getSimpleVT().SimpleTy) {
16325 default:
16326 // For FP types that are currently not supported by PPC backend, return
16327 // false. Examples: f16, f80.
16328 return false;
16329 case MVT::f32:
16330 case MVT::f64:
16331 if (Subtarget.hasPrefixInstrs()) {
16332 // With prefixed instructions, we can materialize anything that can be
16333 // represented with a 32-bit immediate, not just positive zero.
16334 APFloat APFloatOfImm = Imm;
16335 return convertToNonDenormSingle(APFloatOfImm);
16336 }
16337 LLVM_FALLTHROUGH[[gnu::fallthrough]];
16338 case MVT::ppcf128:
16339 return Imm.isPosZero();
16340 }
16341}
16342
16343// For vector shift operation op, fold
16344// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16345static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16346 SelectionDAG &DAG) {
16347 SDValue N0 = N->getOperand(0);
16348 SDValue N1 = N->getOperand(1);
16349 EVT VT = N0.getValueType();
16350 unsigned OpSizeInBits = VT.getScalarSizeInBits();
16351 unsigned Opcode = N->getOpcode();
16352 unsigned TargetOpcode;
16353
16354 switch (Opcode) {
16355 default:
16356 llvm_unreachable("Unexpected shift operation")::llvm::llvm_unreachable_internal("Unexpected shift operation"
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16356)
;
16357 case ISD::SHL:
16358 TargetOpcode = PPCISD::SHL;
16359 break;
16360 case ISD::SRL:
16361 TargetOpcode = PPCISD::SRL;
16362 break;
16363 case ISD::SRA:
16364 TargetOpcode = PPCISD::SRA;
16365 break;
16366 }
16367
16368 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16369 N1->getOpcode() == ISD::AND)
16370 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16371 if (Mask->getZExtValue() == OpSizeInBits - 1)
16372 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16373
16374 return SDValue();
16375}
16376
16377SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16378 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16379 return Value;
16380
16381 SDValue N0 = N->getOperand(0);
16382 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16383 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16384 N0.getOpcode() != ISD::SIGN_EXTEND ||
16385 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16386 N->getValueType(0) != MVT::i64)
16387 return SDValue();
16388
16389 // We can't save an operation here if the value is already extended, and
16390 // the existing shift is easier to combine.
16391 SDValue ExtsSrc = N0.getOperand(0);
16392 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16393 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16394 return SDValue();
16395
16396 SDLoc DL(N0);
16397 SDValue ShiftBy = SDValue(CN1, 0);
16398 // We want the shift amount to be i32 on the extswli, but the shift could
16399 // have an i64.
16400 if (ShiftBy.getValueType() == MVT::i64)
16401 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16402
16403 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16404 ShiftBy);
16405}
16406
16407SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16408 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16409 return Value;
16410
16411 return SDValue();
16412}
16413
16414SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16415 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16416 return Value;
16417
16418 return SDValue();
16419}
16420
16421// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16422// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16423// When C is zero, the equation (addi Z, -C) can be simplified to Z
16424// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16425static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16426 const PPCSubtarget &Subtarget) {
16427 if (!Subtarget.isPPC64())
16428 return SDValue();
16429
16430 SDValue LHS = N->getOperand(0);
16431 SDValue RHS = N->getOperand(1);
16432
16433 auto isZextOfCompareWithConstant = [](SDValue Op) {
16434 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16435 Op.getValueType() != MVT::i64)
16436 return false;
16437
16438 SDValue Cmp = Op.getOperand(0);
16439 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16440 Cmp.getOperand(0).getValueType() != MVT::i64)
16441 return false;
16442
16443 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16444 int64_t NegConstant = 0 - Constant->getSExtValue();
16445 // Due to the limitations of the addi instruction,
16446 // -C is required to be [-32768, 32767].
16447 return isInt<16>(NegConstant);
16448 }
16449
16450 return false;
16451 };
16452
16453 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16454 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16455
16456 // If there is a pattern, canonicalize a zext operand to the RHS.
16457 if (LHSHasPattern && !RHSHasPattern)
16458 std::swap(LHS, RHS);
16459 else if (!LHSHasPattern && !RHSHasPattern)
16460 return SDValue();
16461
16462 SDLoc DL(N);
16463 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16464 SDValue Cmp = RHS.getOperand(0);
16465 SDValue Z = Cmp.getOperand(0);
16466 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16467
16468 assert(Constant && "Constant Should not be a null pointer.")((Constant && "Constant Should not be a null pointer."
) ? static_cast<void> (0) : __assert_fail ("Constant && \"Constant Should not be a null pointer.\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16468, __PRETTY_FUNCTION__))
;
16469 int64_t NegConstant = 0 - Constant->getSExtValue();
16470
16471 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16472 default: break;
16473 case ISD::SETNE: {
16474 // when C == 0
16475 // --> addze X, (addic Z, -1).carry
16476 // /
16477 // add X, (zext(setne Z, C))--
16478 // \ when -32768 <= -C <= 32767 && C != 0
16479 // --> addze X, (addic (addi Z, -C), -1).carry
16480 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16481 DAG.getConstant(NegConstant, DL, MVT::i64));
16482 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16483 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16484 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16485 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16486 SDValue(Addc.getNode(), 1));
16487 }
16488 case ISD::SETEQ: {
16489 // when C == 0
16490 // --> addze X, (subfic Z, 0).carry
16491 // /
16492 // add X, (zext(sete Z, C))--
16493 // \ when -32768 <= -C <= 32767 && C != 0
16494 // --> addze X, (subfic (addi Z, -C), 0).carry
16495 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16496 DAG.getConstant(NegConstant, DL, MVT::i64));
16497 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16498 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16499 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16500 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16501 SDValue(Subc.getNode(), 1));
16502 }
16503 }
16504
16505 return SDValue();
16506}
16507
16508// Transform
16509// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16510// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16511// In this case both C1 and C2 must be known constants.
16512// C1+C2 must fit into a 34 bit signed integer.
16513static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16514 const PPCSubtarget &Subtarget) {
16515 if (!Subtarget.isUsingPCRelativeCalls())
16516 return SDValue();
16517
16518 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16519 // If we find that node try to cast the Global Address and the Constant.
16520 SDValue LHS = N->getOperand(0);
16521 SDValue RHS = N->getOperand(1);
16522
16523 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16524 std::swap(LHS, RHS);
16525
16526 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16527 return SDValue();
16528
16529 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16530 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16531 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16532
16533 // Check that both casts succeeded.
16534 if (!GSDN || !ConstNode)
16535 return SDValue();
16536
16537 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16538 SDLoc DL(GSDN);
16539
16540 // The signed int offset needs to fit in 34 bits.
16541 if (!isInt<34>(NewOffset))
16542 return SDValue();
16543
16544 // The new global address is a copy of the old global address except
16545 // that it has the updated Offset.
16546 SDValue GA =
16547 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16548 NewOffset, GSDN->getTargetFlags());
16549 SDValue MatPCRel =
16550 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16551 return MatPCRel;
16552}
16553
16554SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16555 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16556 return Value;
16557
16558 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16559 return Value;
16560
16561 return SDValue();
16562}
16563
16564// Detect TRUNCATE operations on bitcasts of float128 values.
16565// What we are looking for here is the situtation where we extract a subset
16566// of bits from a 128 bit float.
16567// This can be of two forms:
16568// 1) BITCAST of f128 feeding TRUNCATE
16569// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16570// The reason this is required is because we do not have a legal i128 type
16571// and so we want to prevent having to store the f128 and then reload part
16572// of it.
16573SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16574 DAGCombinerInfo &DCI) const {
16575 // If we are using CRBits then try that first.
16576 if (Subtarget.useCRBits()) {
16577 // Check if CRBits did anything and return that if it did.
16578 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16579 return CRTruncValue;
16580 }
16581
16582 SDLoc dl(N);
16583 SDValue Op0 = N->getOperand(0);
16584
16585 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16586 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16587 EVT VT = N->getValueType(0);
16588 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16589 return SDValue();
16590 SDValue Sub = Op0.getOperand(0);
16591 if (Sub.getOpcode() == ISD::SUB) {
16592 SDValue SubOp0 = Sub.getOperand(0);
16593 SDValue SubOp1 = Sub.getOperand(1);
16594 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16595 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16596 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16597 SubOp1.getOperand(0),
16598 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16599 }
16600 }
16601 }
16602
16603 // Looking for a truncate of i128 to i64.
16604 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16605 return SDValue();
16606
16607 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16608
16609 // SRL feeding TRUNCATE.
16610 if (Op0.getOpcode() == ISD::SRL) {
16611 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16612 // The right shift has to be by 64 bits.
16613 if (!ConstNode || ConstNode->getZExtValue() != 64)
16614 return SDValue();
16615
16616 // Switch the element number to extract.
16617 EltToExtract = EltToExtract ? 0 : 1;
16618 // Update Op0 past the SRL.
16619 Op0 = Op0.getOperand(0);
16620 }
16621
16622 // BITCAST feeding a TRUNCATE possibly via SRL.
16623 if (Op0.getOpcode() == ISD::BITCAST &&
16624 Op0.getValueType() == MVT::i128 &&
16625 Op0.getOperand(0).getValueType() == MVT::f128) {
16626 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16627 return DCI.DAG.getNode(
16628 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16629 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16630 }
16631 return SDValue();
16632}
16633
16634SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16635 SelectionDAG &DAG = DCI.DAG;
16636
16637 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16638 if (!ConstOpOrElement)
16639 return SDValue();
16640
16641 // An imul is usually smaller than the alternative sequence for legal type.
16642 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16643 isOperationLegal(ISD::MUL, N->getValueType(0)))
16644 return SDValue();
16645
16646 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16647 switch (this->Subtarget.getCPUDirective()) {
16648 default:
16649 // TODO: enhance the condition for subtarget before pwr8
16650 return false;
16651 case PPC::DIR_PWR8:
16652 // type mul add shl
16653 // scalar 4 1 1
16654 // vector 7 2 2
16655 return true;
16656 case PPC::DIR_PWR9:
16657 case PPC::DIR_PWR10:
16658 case PPC::DIR_PWR_FUTURE:
16659 // type mul add shl
16660 // scalar 5 2 2
16661 // vector 7 2 2
16662
16663 // The cycle RATIO of related operations are showed as a table above.
16664 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16665 // scalar and vector type. For 2 instrs patterns, add/sub + shl
16666 // are 4, it is always profitable; but for 3 instrs patterns
16667 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16668 // So we should only do it for vector type.
16669 return IsAddOne && IsNeg ? VT.isVector() : true;
16670 }
16671 };
16672
16673 EVT VT = N->getValueType(0);
16674 SDLoc DL(N);
16675
16676 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16677 bool IsNeg = MulAmt.isNegative();
16678 APInt MulAmtAbs = MulAmt.abs();
16679
16680 if ((MulAmtAbs - 1).isPowerOf2()) {
16681 // (mul x, 2^N + 1) => (add (shl x, N), x)
16682 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16683
16684 if (!IsProfitable(IsNeg, true, VT))
16685 return SDValue();
16686
16687 SDValue Op0 = N->getOperand(0);
16688 SDValue Op1 =
16689 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16690 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16691 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16692
16693 if (!IsNeg)
16694 return Res;
16695
16696 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16697 } else if ((MulAmtAbs + 1).isPowerOf2()) {
16698 // (mul x, 2^N - 1) => (sub (shl x, N), x)
16699 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16700
16701 if (!IsProfitable(IsNeg, false, VT))
16702 return SDValue();
16703
16704 SDValue Op0 = N->getOperand(0);
16705 SDValue Op1 =
16706 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16707 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16708
16709 if (!IsNeg)
16710 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16711 else
16712 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16713
16714 } else {
16715 return SDValue();
16716 }
16717}
16718
16719// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16720// in combiner since we need to check SD flags and other subtarget features.
16721SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16722 DAGCombinerInfo &DCI) const {
16723 SDValue N0 = N->getOperand(0);
16724 SDValue N1 = N->getOperand(1);
16725 SDValue N2 = N->getOperand(2);
16726 SDNodeFlags Flags = N->getFlags();
16727 EVT VT = N->getValueType(0);
16728 SelectionDAG &DAG = DCI.DAG;
16729 const TargetOptions &Options = getTargetMachine().Options;
16730 unsigned Opc = N->getOpcode();
16731 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16732 bool LegalOps = !DCI.isBeforeLegalizeOps();
16733 SDLoc Loc(N);
16734
16735 if (!isOperationLegal(ISD::FMA, VT))
16736 return SDValue();
16737
16738 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16739 // since (fnmsub a b c)=-0 while c-ab=+0.
16740 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16741 return SDValue();
16742
16743 // (fma (fneg a) b c) => (fnmsub a b c)
16744 // (fnmsub (fneg a) b c) => (fma a b c)
16745 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16746 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16747
16748 // (fma a (fneg b) c) => (fnmsub a b c)
16749 // (fnmsub a (fneg b) c) => (fma a b c)
16750 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16751 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16752
16753 return SDValue();
16754}
16755
16756bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16757 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16758 if (!Subtarget.is64BitELFABI())
16759 return false;
16760
16761 // If not a tail call then no need to proceed.
16762 if (!CI->isTailCall())
16763 return false;
16764
16765 // If sibling calls have been disabled and tail-calls aren't guaranteed
16766 // there is no reason to duplicate.
16767 auto &TM = getTargetMachine();
16768 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16769 return false;
16770
16771 // Can't tail call a function called indirectly, or if it has variadic args.
16772 const Function *Callee = CI->getCalledFunction();
16773 if (!Callee || Callee->isVarArg())
16774 return false;
16775
16776 // Make sure the callee and caller calling conventions are eligible for tco.
16777 const Function *Caller = CI->getParent()->getParent();
16778 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16779 CI->getCallingConv()))
16780 return false;
16781
16782 // If the function is local then we have a good chance at tail-calling it
16783 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16784}
16785
16786bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16787 if (!Subtarget.hasVSX())
16788 return false;
16789 if (Subtarget.hasP9Vector() && VT == MVT::f128)
16790 return true;
16791 return VT == MVT::f32 || VT == MVT::f64 ||
16792 VT == MVT::v4f32 || VT == MVT::v2f64;
16793}
16794
16795bool PPCTargetLowering::
16796isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16797 const Value *Mask = AndI.getOperand(1);
16798 // If the mask is suitable for andi. or andis. we should sink the and.
16799 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16800 // Can't handle constants wider than 64-bits.
16801 if (CI->getBitWidth() > 64)
16802 return false;
16803 int64_t ConstVal = CI->getZExtValue();
16804 return isUInt<16>(ConstVal) ||
16805 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16806 }
16807
16808 // For non-constant masks, we can always use the record-form and.
16809 return true;
16810}
16811
16812// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16813// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16814// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16815// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16816// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16817SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16818 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here")(((N->getOpcode() == ISD::ABS) && "Need ABS node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::ABS) && \"Need ABS node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16818, __PRETTY_FUNCTION__))
;
16819 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16820, __PRETTY_FUNCTION__))
16820 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16820, __PRETTY_FUNCTION__))
;
16821 EVT VT = N->getValueType(0);
16822 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16823 return SDValue();
16824
16825 SelectionDAG &DAG = DCI.DAG;
16826 SDLoc dl(N);
16827 if (N->getOperand(0).getOpcode() == ISD::SUB) {
16828 // Even for signed integers, if it's known to be positive (as signed
16829 // integer) due to zero-extended inputs.
16830 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16831 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16832 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16833 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16834 (SubOpcd1 == ISD::ZERO_EXTEND ||
16835 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16836 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16837 N->getOperand(0)->getOperand(0),
16838 N->getOperand(0)->getOperand(1),
16839 DAG.getTargetConstant(0, dl, MVT::i32));
16840 }
16841
16842 // For type v4i32, it can be optimized with xvnegsp + vabsduw
16843 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16844 N->getOperand(0).hasOneUse()) {
16845 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16846 N->getOperand(0)->getOperand(0),
16847 N->getOperand(0)->getOperand(1),
16848 DAG.getTargetConstant(1, dl, MVT::i32));
16849 }
16850 }
16851
16852 return SDValue();
16853}
16854
16855// For type v4i32/v8ii16/v16i8, transform
16856// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16857// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16858// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16859// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16860SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16861 DAGCombinerInfo &DCI) const {
16862 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here")(((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT) && \"Need VSELECT node here\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16862, __PRETTY_FUNCTION__))
;
16863 assert(Subtarget.hasP9Altivec() &&((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16864, __PRETTY_FUNCTION__))
16864 "Only combine this when P9 altivec supported!")((Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.hasP9Altivec() && \"Only combine this when P9 altivec supported!\""
, "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Target/PowerPC/PPCISelLowering.cpp"
, 16864, __PRETTY_FUNCTION__))
;
16865
16866 SelectionDAG &DAG = DCI.DAG;
16867 SDLoc dl(N);
16868 SDValue Cond = N->getOperand(0);
16869 SDValue TrueOpnd = N->getOperand(1);
16870 SDValue FalseOpnd = N->getOperand(2);
16871 EVT VT = N->getOperand(1).getValueType();
16872
16873 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16874 FalseOpnd.getOpcode() != ISD::SUB)
16875 return SDValue();
16876
16877 // ABSD only available for type v4i32/v8i16/v16i8
16878 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16879 return SDValue();
16880
16881 // At least to save one more dependent computation
16882 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16883 return SDValue();
16884
16885 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16886
16887 // Can only handle unsigned comparison here
16888 switch (CC) {
16889 default:
16890 return SDValue();
16891 case ISD::SETUGT:
16892 case ISD::SETUGE:
16893 break;
16894 case ISD::SETULT:
16895 case ISD::SETULE:
16896 std::swap(TrueOpnd, FalseOpnd);
16897 break;
16898 }
16899
16900 SDValue CmpOpnd1 = Cond.getOperand(0);
16901 SDValue CmpOpnd2 = Cond.getOperand(1);
16902
16903 // SETCC CmpOpnd1 CmpOpnd2 cond
16904 // TrueOpnd = CmpOpnd1 - CmpOpnd2
16905 // FalseOpnd = CmpOpnd2 - CmpOpnd1
16906 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16907 TrueOpnd.getOperand(1) == CmpOpnd2 &&
16908 FalseOpnd.getOperand(0) == CmpOpnd2 &&
16909 FalseOpnd.getOperand(1) == CmpOpnd1) {
16910 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16911 CmpOpnd1, CmpOpnd2,
16912 DAG.getTargetConstant(0, dl, MVT::i32));
16913 }
16914
16915 return SDValue();
16916}